Skip to content

Commit 690bb56

Browse files
pinin4fjordsclaude
andauthored
Add new module: telogator2 (#11033)
* Add new module: telogator2 Add nf-core module for telogator2, a tool for allele-specific telomere length estimation and TVR characterization from long-read sequencing data (ONT/PacBio). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * add telomere test data and temporarily point to fork - Add ONT telomere reads test (exercises real analysis path) - Keep PacBio no-telomere test (exercises graceful fallback) - Temporarily override modules_testdata_base_path to pinin4fjords/test-datasets#telogator2-test-data (revert before merge) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: combine fasta and fai into single reference channel Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: simplify process script and assert failure on no telomere reads Remove error-catching wrapper from telogator2 process script. When no telomere reads are found the tool now fails with a clear error message, which the no-telomere test asserts against. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: emit individual files instead of directory - tlens: tlens_by_allele.tsv (primary result) - plots: *.png (allele and violin plots, optional) - qc: qc directory (stats, read lengths, metadata) Also revert modules_testdata_base_path now that nf-core/test-datasets#1947 is merged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: exclude non-deterministic rng.txt from snapshot The qc/rng.txt file contains a random seed that differs across runs. Assert qc output exists but don't snapshot it. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: handle non-deterministic telogator2 outputs in tests Set fixed random seed (--rng 42) via test config. Assert tlens header structure rather than md5 since TL values vary across runs due to minimap2 non-determinism. Assert plots and qc exist without snapshotting. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: address PR review feedback - Use module_args pattern for ext.args in test config - Snapshot output file names (not md5s) for non-deterministic outputs - Remove PNGs from stub (plots are optional) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: make plots a required output The two main plots (all_final_alleles.png, violin_atl.png) are always produced on a successful run. Remove optional flag and add them back to the stub. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * refactor: split QC directory into individual output channels Emit cmd, stats, qc_readlens, readlens, and rng as separate channels instead of a single qc directory. Touch all QC files in stub. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5765afd commit 690bb56

File tree

6 files changed

+470
-0
lines changed

6 files changed

+470
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
3+
channels:
4+
- conda-forge
5+
- bioconda
6+
dependencies:
7+
- telogator2=2.2.3

modules/nf-core/telogator2/main.nf

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
process TELOGATOR2 {
2+
tag "${meta.id}"
3+
label 'process_medium'
4+
5+
conda "${moduleDir}/environment.yml"
6+
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
7+
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ab/ab4d9d463b2866006f8cbca9fbe6f978b1803e41f2a97d9f4d3c14ff6d97822f/data'
8+
: 'community.wave.seqera.io/library/telogator2:2.2.3--01b2748e09721f3b' }"
9+
10+
input:
11+
tuple val(meta), path(reads), path(reads_index)
12+
tuple val(meta2), path(fasta), path(fai)
13+
14+
output:
15+
tuple val(meta), path("${prefix}/tlens_by_allele.tsv") , emit: tlens
16+
tuple val(meta), path("${prefix}/*.png") , emit: plots
17+
tuple val(meta), path("${prefix}/qc/cmd.txt") , emit: cmd
18+
tuple val(meta), path("${prefix}/qc/stats.txt") , emit: stats
19+
tuple val(meta), path("${prefix}/qc/qc_readlens.png") , emit: qc_readlens
20+
tuple val(meta), path("${prefix}/qc/readlens.npz") , emit: readlens
21+
tuple val(meta), path("${prefix}/qc/rng.txt") , emit: rng
22+
tuple val("${task.process}"), val('telogator2'), eval("telogator2 --version | sed 's/telogator2 //'"), emit: versions_telogator2, topic: versions
23+
24+
when:
25+
task.ext.when == null || task.ext.when
26+
27+
script:
28+
def args = task.ext.args ?: ''
29+
prefix = task.ext.prefix ?: "${meta.id}"
30+
def ref_arg = fasta ? "--ref ${fasta}" : ""
31+
"""
32+
telogator2 \\
33+
-i ${reads} \\
34+
-o ${prefix} \\
35+
-p ${task.cpus} \\
36+
${ref_arg} \\
37+
${args}
38+
"""
39+
40+
stub:
41+
prefix = task.ext.prefix ?: "${meta.id}"
42+
"""
43+
mkdir -p ${prefix}/qc
44+
touch ${prefix}/tlens_by_allele.tsv
45+
touch ${prefix}/all_final_alleles.png
46+
touch ${prefix}/violin_atl.png
47+
touch ${prefix}/qc/cmd.txt
48+
touch ${prefix}/qc/qc_readlens.png
49+
touch ${prefix}/qc/readlens.npz
50+
touch ${prefix}/qc/rng.txt
51+
touch ${prefix}/qc/stats.txt
52+
"""
53+
}
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
name: "telogator2"
2+
description: "Allele-specific telomere length estimation and TVR characterization from long reads"
3+
keywords:
4+
- bam
5+
- cram
6+
- genomics
7+
- telomere
8+
- long-read
9+
- pacbio
10+
- nanopore
11+
tools:
12+
- "telogator2":
13+
description: "A method for measuring allele-specific TL and characterizing telomere variant repeat (TVR) sequences from long reads"
14+
homepage: "https://github.com/zstephens/telogator2"
15+
documentation: "https://github.com/zstephens/telogator2"
16+
tool_dev_url: "https://github.com/zstephens/telogator2"
17+
doi: "10.1093/bioinformatics/btae078"
18+
licence:
19+
- "MIT"
20+
args_id: "$args"
21+
identifier: "biotools:telogator2"
22+
input:
23+
- - meta:
24+
type: map
25+
description: |
26+
Groovy Map containing sample information
27+
e.g. [ id:'test' ]
28+
- reads:
29+
type: file
30+
description: BAM or CRAM file of long reads (PacBio or ONT)
31+
pattern: "*.{bam,cram}"
32+
ontologies:
33+
- edam: http://edamontology.org/format_2572
34+
- reads_index:
35+
type: file
36+
description: Index file for the input BAM/CRAM
37+
pattern: "*.{bai,crai}"
38+
ontologies: []
39+
- - meta2:
40+
type: map
41+
description: |
42+
Groovy Map containing reference information
43+
e.g. [ id:'genome' ]
44+
- fasta:
45+
type: file
46+
description: Optional reference genome FASTA file
47+
pattern: "*.{fa,fasta,fa.gz}"
48+
ontologies:
49+
- edam: http://edamontology.org/format_1929
50+
- fai:
51+
type: file
52+
description: Optional FASTA index file
53+
pattern: "*.fai"
54+
ontologies: []
55+
output:
56+
tlens:
57+
- - meta:
58+
type: map
59+
description: |
60+
Groovy Map containing sample information
61+
e.g. [ id:'test' ]
62+
- ${prefix}/tlens_by_allele.tsv:
63+
type: file
64+
description: TSV file with telomere length estimates per allele
65+
pattern: "*/tlens_by_allele.tsv"
66+
ontologies:
67+
- edam: http://edamontology.org/format_3475
68+
plots:
69+
- - meta:
70+
type: map
71+
description: |
72+
Groovy Map containing sample information
73+
e.g. [ id:'test' ]
74+
- ${prefix}/*.png:
75+
type: file
76+
description: PNG plots including allele visualizations and violin plots
77+
pattern: "*/*.png"
78+
ontologies:
79+
- edam: http://edamontology.org/format_3603
80+
cmd:
81+
- - meta:
82+
type: map
83+
description: |
84+
Groovy Map containing sample information
85+
e.g. [ id:'test' ]
86+
- ${prefix}/qc/cmd.txt:
87+
type: file
88+
description: Text file recording the telogator2 command that was run
89+
pattern: "*/qc/cmd.txt"
90+
ontologies: []
91+
stats:
92+
- - meta:
93+
type: map
94+
description: |
95+
Groovy Map containing sample information
96+
e.g. [ id:'test' ]
97+
- ${prefix}/qc/stats.txt:
98+
type: file
99+
description: Text file with QC statistics including read counts and telomere read filtering
100+
pattern: "*/qc/stats.txt"
101+
ontologies: []
102+
qc_readlens:
103+
- - meta:
104+
type: map
105+
description: |
106+
Groovy Map containing sample information
107+
e.g. [ id:'test' ]
108+
- ${prefix}/qc/qc_readlens.png:
109+
type: file
110+
description: PNG plot of read length distribution
111+
pattern: "*/qc/qc_readlens.png"
112+
ontologies:
113+
- edam: http://edamontology.org/format_3603
114+
readlens:
115+
- - meta:
116+
type: map
117+
description: |
118+
Groovy Map containing sample information
119+
e.g. [ id:'test' ]
120+
- ${prefix}/qc/readlens.npz:
121+
type: file
122+
description: Numpy compressed array of read length data
123+
pattern: "*/qc/readlens.npz"
124+
ontologies: []
125+
rng:
126+
- - meta:
127+
type: map
128+
description: |
129+
Groovy Map containing sample information
130+
e.g. [ id:'test' ]
131+
- ${prefix}/qc/rng.txt:
132+
type: file
133+
description: Text file recording the random seed used
134+
pattern: "*/qc/rng.txt"
135+
ontologies: []
136+
versions_telogator2:
137+
- - ${task.process}:
138+
type: string
139+
description: The name of the process
140+
- telogator2:
141+
type: string
142+
description: The name of the tool
143+
- "telogator2 --version | sed 's/telogator2 //'":
144+
type: eval
145+
description: The expression to obtain the version of the tool
146+
topics:
147+
versions:
148+
- - ${task.process}:
149+
type: string
150+
description: The name of the process
151+
- telogator2:
152+
type: string
153+
description: The name of the tool
154+
- "telogator2 --version | sed 's/telogator2 //'":
155+
type: eval
156+
description: The expression to obtain the version of the tool
157+
authors:
158+
- "@pinin4fjords"
159+
maintainers:
160+
- "@pinin4fjords"
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// nf-core modules test telogator2
2+
nextflow_process {
3+
4+
name "Test Process TELOGATOR2"
5+
script "../main.nf"
6+
process "TELOGATOR2"
7+
config "./nextflow.config"
8+
tag "modules"
9+
tag "modules_nfcore"
10+
tag "telogator2"
11+
12+
test("homo_sapiens - ont bam - telomere reads") {
13+
when {
14+
params {
15+
module_args = '--rng 42'
16+
}
17+
process {
18+
"""
19+
input[0] = [
20+
[ id:'test_tel' ], // meta map
21+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/HG002_ont_telomere/HG002_ont_tel_sub.bam', checkIfExists: true),
22+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/HG002_ont_telomere/HG002_ont_tel_sub.bam.bai', checkIfExists: true)
23+
]
24+
input[1] = [ [ id:'genome' ], [], [] ]
25+
"""
26+
}
27+
}
28+
29+
then {
30+
assertAll(
31+
{ assert process.success },
32+
{ assert path(process.out.tlens.get(0).get(1)).readLines().first() ==
33+
'#chr\tposition\tref_samp\tallele_id\tTL_p75\tread_TLs\tread_lengths\tread_mapq\ttvr_len\ttvr_consensus\tsupporting_reads' },
34+
{ assert process.out.plots },
35+
{ assert snapshot(
36+
file(process.out.tlens.get(0).get(1)).name,
37+
process.out.plots.collect { meta, pngs -> pngs.collect { png -> file(png).name } },
38+
process.out.cmd,
39+
process.out.stats,
40+
process.out.qc_readlens,
41+
process.out.readlens,
42+
process.out.rng,
43+
process.out.findAll { key, val -> key.startsWith('versions') }
44+
).match() }
45+
)
46+
}
47+
}
48+
49+
test("homo_sapiens - pacbio bam - no telomere reads") {
50+
when {
51+
params {
52+
module_args = '--rng 42'
53+
}
54+
process {
55+
"""
56+
input[0] = [
57+
[ id:'test' ], // meta map
58+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
59+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true)
60+
]
61+
input[1] = [ [ id:'genome' ], [], [] ]
62+
"""
63+
}
64+
}
65+
66+
then {
67+
assertAll(
68+
{ assert process.failed },
69+
{ assert process.stdout.toString().contains('No telomere reads found') }
70+
)
71+
}
72+
}
73+
74+
test("homo_sapiens - pacbio bam - stub") {
75+
76+
options "-stub"
77+
78+
when {
79+
params {
80+
module_args = '--rng 42'
81+
}
82+
process {
83+
"""
84+
input[0] = [
85+
[ id:'stub' ], // meta map
86+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
87+
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true)
88+
]
89+
input[1] = [ [ id:'genome' ], [], [] ]
90+
"""
91+
}
92+
}
93+
94+
then {
95+
assertAll(
96+
{ assert process.success },
97+
{ assert snapshot(sanitizeOutput(process.out)).match() }
98+
)
99+
}
100+
}
101+
}

0 commit comments

Comments
 (0)