Skip to content

Commit 73a15fe

Browse files
committed
Update bioawk
1 parent d5a80a3 commit 73a15fe

File tree

6 files changed

+159
-34
lines changed

6 files changed

+159
-34
lines changed

modules/nf-core/bioawk/main.nf

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
process BIOAWK {
2-
tag "$meta.id"
2+
tag "${meta.id}"
33
label 'process_single'
44

55
conda "${moduleDir}/environment.yml"
@@ -9,26 +9,39 @@ process BIOAWK {
99

1010
input:
1111
tuple val(meta), path(input)
12+
val suffix
13+
val zip_bool
1214

1315
output:
14-
tuple val(meta), path("*.gz"), emit: output
16+
tuple val(meta), path("${file_output}"), optional: true, emit: output
17+
tuple val(meta), path("*.gz"), optional: true, emit: gz_output
1518
tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions
1619
// WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions.
1720

1821
when:
1922
task.ext.when == null || task.ext.when
2023

2124
script:
22-
def args = task.ext.args ?: ''
23-
def prefix = task.ext.prefix ?: "${meta.id}"
24-
if ("${input}" == "${prefix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate."
25+
def args = task.ext.args ?: ''
26+
def prefix = task.ext.prefix ?: "${meta.id}"
27+
file_output = "${prefix}.${suffix}"
28+
if ("${input}" == "${file_output}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate."
2529

2630
"""
2731
bioawk \
2832
$args \
2933
$input \
30-
> ${prefix}
34+
> ${file_output}
3135
32-
gzip ${prefix}
36+
if [ "${zip_bool}" = "true" ]; then
37+
gzip "${file_output}"
38+
fi
39+
40+
"""
41+
42+
stub:
43+
"""
44+
touch ${file_output}
45+
echo "" > ${file_output}.gz
3346
"""
3447
}

modules/nf-core/bioawk/meta.yml

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,23 @@
1-
schema_version: 1.1.0
21
name: "bioawk"
3-
description: Bioawk is an extension to Brian Kernighan's awk, adding the support of several common biological data formats.
4-
2+
description: Bioawk is an extension to Brian Kernighan's awk, adding the support
3+
of several common biological data formats.
54
keywords:
65
- bioawk
76
- fastq
87
- fasta
98
- sam
109
- file manipulation
1110
- awk
12-
1311
tools:
1412
- bioawk:
1513
description: BWK awk modified for biological data
1614
homepage: https://github.com/lh3/bioawk
1715
documentation: https://github.com/lh3/bioawk
1816
tool_dev_url: https://github.com/lh3/bioawk
1917
licence:
20-
- Free software license (https://github.com/lh3/bioawk/blob/master/README.awk#L1)
18+
- Free software license
19+
(https://github.com/lh3/bioawk/blob/master/README.awk#L1)
2120
identifier: ""
22-
2321
input:
2422
- - meta:
2523
type: map
@@ -31,9 +29,28 @@ input:
3129
description: |
3230
Input biological sequence file (optionally gzipped) to be manipulated via the program specified in `$args`.
3331
pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}"
34-
32+
ontologies:
33+
- edam: http://edamontology.org/format_1930
34+
- edam: http://edamontology.org/format_3475
35+
- suffix:
36+
type: string
37+
description: The suffix to add to the output file name.
38+
- zip_bool:
39+
type: boolean
40+
description: Whether to gzip the output file.
41+
pattern: "true|false"
3542
output:
3643
output:
44+
- - meta:
45+
type: map
46+
description: |
47+
Groovy Map containing sample information
48+
e.g. [ id:'test', single_end:false ]
49+
- ${file_output}:
50+
type: file
51+
description: Manipulated version of the input sequence file.
52+
ontologies: []
53+
gz_output:
3754
- - meta:
3855
type: map
3956
description: |
@@ -43,32 +60,31 @@ output:
4360
type: file
4461
description: Manipulated and gzipped version of the input sequence file.
4562
pattern: "*.gz"
46-
63+
ontologies:
64+
- edam: http://edamontology.org/format_3989
4765
versions_bioawk:
48-
- - "${task.process}":
66+
- - ${task.process}:
4967
type: string
5068
description: The name of the process
51-
- "bioawk":
69+
- bioawk:
5270
type: string
5371
description: The name of the tool
5472
- "1.0":
5573
type: string
56-
description: The version of the tool
57-
74+
description: The expression to obtain the version of the tool
5875
topics:
5976
versions:
60-
- - "${task.process}":
77+
- - ${task.process}:
6178
type: string
6279
description: The name of the process
63-
- "bioawk":
80+
- bioawk:
6481
type: string
6582
description: The name of the tool
6683
- "1.0":
6784
type: string
68-
description: The version of the tool
69-
85+
description: The expression to obtain the version of the tool
7086
authors:
7187
- "@jfy133"
72-
7388
maintainers:
7489
- "@jfy133"
90+
schema_version: 1.1.0

modules/nf-core/bioawk/tests/main.nf.test

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ nextflow_process {
44
name "Test Process BIOAWK"
55
script "../main.nf"
66
process "BIOAWK"
7-
config "./nextflow.config"
87

98
tag "modules"
109
tag "modules_nfcore"
1110
tag "bioawk"
1211

13-
test("test-bioawk") {
12+
test("fasta bioawk fasta.gz") {
13+
config "./nextflow.config"
1414

1515
when {
1616
process {
@@ -19,6 +19,33 @@ nextflow_process {
1919
[ id:'test', single_end:false ], // meta map
2020
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
2121
]
22+
input[1] = "fa"
23+
input[2] = true
24+
25+
"""
26+
}
27+
}
28+
29+
then {
30+
assertAll(
31+
{ assert process.success },
32+
{ assert snapshot(process.out).match() }
33+
)
34+
}
35+
}
36+
37+
test("fasta bioawk tsv") {
38+
config "./nextflow_telomere_check.config"
39+
40+
when {
41+
process {
42+
"""
43+
input[0] = [
44+
[ id:'test', single_end:false ], // meta map
45+
file('/lustre/scratch124/tol/teams/tola/users/dp24/nf-modules/modules/nf-core/bioawk/tests/telomere.fasta', checkIfExists: true)
46+
]
47+
input[1] = "tsv"
48+
input[2] = false
2249

2350
"""
2451
}

modules/nf-core/bioawk/tests/main.nf.test.snap

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"test-bioawk": {
2+
"fasta bioawk fasta.gz": {
33
"content": [
44
{
55
"0": [
@@ -12,7 +12,17 @@
1212
]
1313
],
1414
"1": [
15-
"versions.yml:md5,5fe88e58a71f10551df56518c35ba91a"
15+
16+
],
17+
"2": [
18+
[
19+
"BIOAWK",
20+
"bioawk",
21+
"1.0"
22+
]
23+
],
24+
"gz_output": [
25+
1626
],
1727
"output": [
1828
[
@@ -23,15 +33,68 @@
2333
"sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693"
2434
]
2535
],
26-
"versions": [
27-
"versions.yml:md5,5fe88e58a71f10551df56518c35ba91a"
36+
"versions_bioawk": [
37+
[
38+
"BIOAWK",
39+
"bioawk",
40+
"1.0"
41+
]
42+
]
43+
}
44+
],
45+
"meta": {
46+
"nf-test": "0.9.3",
47+
"nextflow": "25.10.2"
48+
},
49+
"timestamp": "2026-03-25T12:35:30.509942773"
50+
},
51+
"fasta bioawk tsv": {
52+
"content": [
53+
{
54+
"0": [
55+
56+
],
57+
"1": [
58+
[
59+
{
60+
"id": "test",
61+
"single_end": false
62+
},
63+
"telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a"
64+
]
65+
],
66+
"2": [
67+
[
68+
"BIOAWK",
69+
"bioawk",
70+
"1.0"
71+
]
72+
],
73+
"gz_output": [
74+
[
75+
{
76+
"id": "test",
77+
"single_end": false
78+
},
79+
"telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a"
80+
]
81+
],
82+
"output": [
83+
84+
],
85+
"versions_bioawk": [
86+
[
87+
"BIOAWK",
88+
"bioawk",
89+
"1.0"
90+
]
2891
]
2992
}
3093
],
3194
"meta": {
32-
"nf-test": "0.8.4",
33-
"nextflow": "24.04.4"
95+
"nf-test": "0.9.3",
96+
"nextflow": "25.10.2"
3497
},
35-
"timestamp": "2024-08-28T10:24:46.397249"
98+
"timestamp": "2026-03-25T12:39:43.207782527"
3699
}
37100
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
process {
22
withName: BIOAWK {
33
ext.args = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'"
4-
ext.prefix = "sample_1.fa"
4+
ext.prefix = "sample_1"
55
}
66
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
process {
2+
withName: BIOAWK {
3+
ext.args = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s}\'"
4+
ext.prefix = "telomere_summary"
5+
}
6+
}

0 commit comments

Comments
 (0)