From c54c32c4947df993231c364ac3fe19f2abb85eed Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Apr 2022 10:18:34 +0200 Subject: [PATCH 1/4] chore: update adapterremoval --- modules.json | 4 +- .../nf-core/modules/adapterremoval/main.nf | 44 ++++++++++++++----- .../nf-core/modules/adapterremoval/meta.yml | 14 +++--- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/modules.json b/modules.json index 9953edb..51f13ea 100644 --- a/modules.json +++ b/modules.json @@ -4,7 +4,7 @@ "repos": { "nf-core/modules": { "adapterremoval": { - "git_sha": "f0800157544a82ae222931764483331a81812012" + "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a" }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -38,4 +38,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/adapterremoval/main.nf b/modules/nf-core/modules/adapterremoval/main.nf index 9d16b9c..0e17c05 100644 --- a/modules/nf-core/modules/adapterremoval/main.nf +++ b/modules/nf-core/modules/adapterremoval/main.nf @@ -12,15 +12,14 @@ process ADAPTERREMOVAL { path(adapterlist) output: - tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated - tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded - tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated - tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated - tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed - tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated - tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved - tuple val(meta), path('*.log') , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.truncated.fastq.gz") , optional: true, emit: singles_truncated + tuple val(meta), path("${prefix}.discarded.fastq.gz") , optional: true, emit: discarded + tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz") , optional: true, emit: paired_truncated + tuple val(meta), path("${prefix}.collapsed.fastq.gz") , optional: true, emit: collapsed + tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz") , optional: true, emit: collapsed_truncated + tuple val(meta), path("${prefix}.paired.fastq.gz") , optional: true, emit: paired_interleaved + tuple val(meta), path('*.settings') , emit: settings + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +37,19 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads ${task.cpus} \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") @@ -56,10 +64,24 @@ process ADAPTERREMOVAL { $adapterlist \\ --basename ${prefix} \\ --threads $task.cpus \\ - --settings ${prefix}.log \\ --seed 42 \\ --gzip + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + ensure_fastq '${prefix}.pair1.truncated.gz' + ensure_fastq '${prefix}.pair2.truncated.gz' + ensure_fastq '${prefix}.collapsed.gz' + ensure_fastq '${prefix}.collapsed.truncated.gz' + ensure_fastq '${prefix}.paired.gz' + cat <<-END_VERSIONS > versions.yml "${task.process}": adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") diff --git a/modules/nf-core/modules/adapterremoval/meta.yml b/modules/nf-core/modules/adapterremoval/meta.yml index 5faad04..77273f6 100644 --- a/modules/nf-core/modules/adapterremoval/meta.yml +++ b/modules/nf-core/modules/adapterremoval/meta.yml @@ -43,43 +43,43 @@ output: Adapter trimmed FastQ files of either single-end reads, or singleton 'orphaned' reads from merging of paired-end data (i.e., one of the pair was lost due to filtering thresholds). - pattern: "*.truncated.gz" + pattern: "*.truncated.fastq.gz" - discarded: type: file description: | Adapter trimmed FastQ files of reads that did not pass filtering thresholds. - pattern: "*.discarded.gz" + pattern: "*.discarded.fastq.gz" - pair1_truncated: type: file description: | Adapter trimmed R1 FastQ files of paired-end reads that did not merge with their respective R2 pair due to long templates. The respective pair is stored in 'pair2_truncated'. - pattern: "*.pair1.truncated.gz" + pattern: "*.pair1.truncated.fastq.gz" - pair2_truncated: type: file description: | Adapter trimmed R2 FastQ files of paired-end reads that did not merge with their respective R1 pair due to long templates. The respective pair is stored in 'pair1_truncated'. - pattern: "*.pair2.truncated.gz" + pattern: "*.pair2.truncated.fastq.gz" - collapsed: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair but were not trimmed. - pattern: "*.collapsed.gz" + pattern: "*.collapsed.fastq.gz" - collapsed_truncated: type: file description: | Collapsed FastQ of paired-end reads that successfully merged with their respective R1 pair and were trimmed of adapter due to sufficient overlap. - pattern: "*.collapsed.truncated.gz" + pattern: "*.collapsed.truncated.fastq.gz" - log: type: file description: AdapterRemoval log file - pattern: "*.log" + pattern: "*.settings" - versions: type: file description: File containing software versions From 0e9b2989e3fd59b6e05a77a8faf6ed89d6ddea03 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Apr 2022 10:53:04 +0200 Subject: [PATCH 2/4] refactor: remove superfluous ENSURE_FASTQ_EXTENSION --- .../local/shortread_adapterremoval.nf | 75 ++++--------------- 1 file changed, 16 insertions(+), 59 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index e522a1a..8c02f6f 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -5,11 +5,6 @@ Process short raw reads with AdapterRemoval include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' -include { - ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION1; - ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION2; - ENSURE_FASTQ_EXTENSION as ENSURE_FASTQ_EXTENSION3; -} from '../../modules/local/ensure_fastq_extension' workflow SHORTREAD_ADAPTERREMOVAL { @@ -36,34 +31,25 @@ workflow SHORTREAD_ADAPTERREMOVAL { if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { - ENSURE_FASTQ_EXTENSION1( - Channel.empty().mix( + ch_concat_fastq = Channel.empty() + .mix( ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.singles_truncated, - ADAPTERREMOVAL_PAIRED.out.pair1_truncated, - ADAPTERREMOVAL_PAIRED.out.pair2_truncated + ADAPTERREMOVAL_PAIRED.out.paired_truncated ) - .map { meta, reads -> - meta.single_end = true - [meta, reads] - } - ) + .groupTuple() + .map { [it.head(), it.tail().flatten()] } - CAT_FASTQ( - ENSURE_FASTQ_EXTENSION1.out.reads - .groupTuple() - ) - - ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + CAT_FASTQ(ch_concat_fastq) ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads - .mix(ENSURE_FASTQ_EXTENSION2.out.reads) + .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { - ENSURE_FASTQ_EXTENSION1( - Channel.empty().mix( + ch_concat_fastq = Channel.empty() + .mix( ADAPTERREMOVAL_PAIRED.out.collapsed, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) @@ -71,54 +57,25 @@ workflow SHORTREAD_ADAPTERREMOVAL { meta.single_end = true [meta, reads] } - ) + .groupTuple() - CAT_FASTQ( - ENSURE_FASTQ_EXTENSION1.out.reads - .groupTuple() - ) - - ENSURE_FASTQ_EXTENSION2(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + CAT_FASTQ(ch_concat_fastq) ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads - .mix(ENSURE_FASTQ_EXTENSION2.out.reads) + .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) } else { - ENSURE_FASTQ_EXTENSION1( - ADAPTERREMOVAL_PAIRED.out.pair1_truncated - .map { meta, reads -> - meta.single_end = true - [meta, reads] - } - ) - - ENSURE_FASTQ_EXTENSION2( - ADAPTERREMOVAL_PAIRED.out.pair2_truncated - .map { meta, reads -> - meta.single_end = true - [meta, reads] - } - ) - - ENSURE_FASTQ_EXTENSION3(ADAPTERREMOVAL_SINGLE.out.singles_truncated) - - ch_adapterremoval_reads_prepped = ENSURE_FASTQ_EXTENSION1.out.reads - .join(ENSURE_FASTQ_EXTENSION2.out.reads) - .groupTuple() - .map { meta, pair1, pair2 -> - meta.single_end = false - [ meta, [ pair1, pair2 ].flatten() ] - } - .mix(ENSURE_FASTQ_EXTENSION3.out.reads) + ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated + .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) } ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) ch_multiqc_files = ch_multiqc_files.mix( - ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, - ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} + ADAPTERREMOVAL_PAIRED.out.settings.collect{it[1]}, + ADAPTERREMOVAL_SINGLE.out.settings.collect{it[1]} ) emit: From 59b2088aa183a58523f712ec5ee85cbdab8e1647 Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Apr 2022 11:16:43 +0200 Subject: [PATCH 3/4] style: apply prettier --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 51f13ea..becb501 100644 --- a/modules.json +++ b/modules.json @@ -38,4 +38,4 @@ } } } -} \ No newline at end of file +} From 39e5e802c7af4e7e36927afa471801ff3b8aab6e Mon Sep 17 00:00:00 2001 From: "Moritz E. Beber" Date: Tue, 5 Apr 2022 11:29:39 +0200 Subject: [PATCH 4/4] refactor: make code more explicit, add comment --- subworkflows/local/shortread_adapterremoval.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 8c02f6f..906a33f 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -39,7 +39,9 @@ workflow SHORTREAD_ADAPTERREMOVAL { ADAPTERREMOVAL_PAIRED.out.paired_truncated ) .groupTuple() - .map { [it.head(), it.tail().flatten()] } + // Paired-end reads cause a nested tuple during grouping. + // We want to present a flat list of files to `CAT_FASTQ`. + .map { meta, fastq -> [meta, fastq.flatten()] } CAT_FASTQ(ch_concat_fastq)