From c4c93bd59d5390c7970dea72575ff9bf8fe1707b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 31 Mar 2022 15:31:45 +0200 Subject: [PATCH 1/8] Start working on adding adapterremoval --- modules.json | 5 +- .../nf-core/modules/adapterremoval/main.nf | 70 ++++++++++++++ .../nf-core/modules/adapterremoval/meta.yml | 90 ++++++++++++++++++ nextflow_schema.json | 26 ++++-- .../local/shortread_adapterremoval.nf | 91 +++++++++++++++++++ subworkflows/local/shortread_fastp.nf | 8 +- subworkflows/local/shortread_preprocessing.nf | 5 + 7 files changed, 281 insertions(+), 14 deletions(-) create mode 100644 modules/nf-core/modules/adapterremoval/main.nf create mode 100644 modules/nf-core/modules/adapterremoval/meta.yml create mode 100644 subworkflows/local/shortread_adapterremoval.nf diff --git a/modules.json b/modules.json index 34ef9f1..c4b2f12 100644 --- a/modules.json +++ b/modules.json @@ -3,6 +3,9 @@ "homePage": "https://github.com/nf-core/taxprofiler", "repos": { "nf-core/modules": { + "adapterremoval": { + "git_sha": "f0800157544a82ae222931764483331a81812012" + }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -32,4 +35,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/adapterremoval/main.nf b/modules/nf-core/modules/adapterremoval/main.nf new file mode 100644 index 0000000..9d16b9c --- /dev/null +++ b/modules/nf-core/modules/adapterremoval/main.nf @@ -0,0 +1,70 @@ +process ADAPTERREMOVAL { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' : + 'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }" + + input: + tuple val(meta), path(reads) + path(adapterlist) + + output: + tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated + tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded + tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated + tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated + tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed + tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated + tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def list = adapterlist ? "--adapter-list ${adapterlist}" : "" + prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + """ + AdapterRemoval \\ + --file1 $reads \\ + $args \\ + $adapterlist \\ + --basename ${prefix} \\ + --threads ${task.cpus} \\ + --settings ${prefix}.log \\ + --seed 42 \\ + --gzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } else { + """ + AdapterRemoval \\ + --file1 ${reads[0]} \\ + --file2 ${reads[1]} \\ + $args \\ + $adapterlist \\ + --basename ${prefix} \\ + --threads $task.cpus \\ + --settings ${prefix}.log \\ + --seed 42 \\ + --gzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } + +} diff --git a/modules/nf-core/modules/adapterremoval/meta.yml b/modules/nf-core/modules/adapterremoval/meta.yml new file mode 100644 index 0000000..5faad04 --- /dev/null +++ b/modules/nf-core/modules/adapterremoval/meta.yml @@ -0,0 +1,90 @@ +name: adapterremoval +description: Trim sequencing adapters and collapse overlapping reads +keywords: + - trimming + - adapters + - merging + - fastq +tools: + - adapterremoval: + description: The AdapterRemoval v2 tool for merging and clipping reads. + homepage: https://github.com/MikkelSchubert/adapterremoval + documentation: https://adapterremoval.readthedocs.io + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - adapterlist: + type: file + description: Optional text file containing list of adapters to look for for removal + with one adapter per line. Otherwise will look for default adapters (see + AdapterRemoval man page), or can be modified to remove user-specified + adapters via ext.args. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - singles_truncated: + type: file + description: | + Adapter trimmed FastQ files of either single-end reads, or singleton + 'orphaned' reads from merging of paired-end data (i.e., one of the pair + was lost due to filtering thresholds). + pattern: "*.truncated.gz" + - discarded: + type: file + description: | + Adapter trimmed FastQ files of reads that did not pass filtering + thresholds. + pattern: "*.discarded.gz" + - pair1_truncated: + type: file + description: | + Adapter trimmed R1 FastQ files of paired-end reads that did not merge + with their respective R2 pair due to long templates. The respective pair + is stored in 'pair2_truncated'. + pattern: "*.pair1.truncated.gz" + - pair2_truncated: + type: file + description: | + Adapter trimmed R2 FastQ files of paired-end reads that did not merge + with their respective R1 pair due to long templates. The respective pair + is stored in 'pair1_truncated'. + pattern: "*.pair2.truncated.gz" + - collapsed: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair but were not trimmed. + pattern: "*.collapsed.gz" + - collapsed_truncated: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair and were trimmed of adapter due to sufficient overlap. + pattern: "*.collapsed.truncated.gz" + - log: + type: file + description: AdapterRemoval log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@maxibor" + - "@jfy133" diff --git a/nextflow_schema.json b/nextflow_schema.json index adc5a73..ebb748a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -284,7 +294,11 @@ }, "shortread_clipmerge_tool": { "type": "string", - "default": "fastp" + "default": "fastp", + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -294,15 +308,15 @@ }, "shortread_clipmerge_adapter1": { "type": "string", - "default": null + "default": "None" }, "shortread_clipmerge_adapter2": { "type": "string", - "default": null + "default": "None" }, "shortread_clipmerge_minlength": { "type": "integer", "default": 15 } } -} +} \ No newline at end of file diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf new file mode 100644 index 0000000..b09356a --- /dev/null +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -0,0 +1,91 @@ +/* +Process short raw reads with AdapterRemoval +*/ + +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' +include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' + +workflow SHORTREAD_ADAPTERREMOVAL { + + take: + reads // [[meta], [reads]] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_input_for_adapterremoval = reads + .dump(tag: "pre_adapterremoval_branch") + .branch{ + single: it[0]['single_end'] == true + paired: it[0]['single_end'] == false + } + + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + + if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { + ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed + .mix( + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ADAPTERREMOVAL_PAIRED.out.singles_truncated, + ADAPTERREMOVAL_PAIRED.out.pair1_truncated, + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + + [ meta_new, reads ] + } + .groupTuple(by: 0) + ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + + ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + + } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { + ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed + .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + + [ meta_new, reads ] + } + .groupTuple(by: 0) + + ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + + ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + + } else { + + ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated + .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) + .dump(tag: "pre-group") + .groupTuple(by: 0) + .dump(tag: "post-group") + .map { meta, pair1, pair2 -> + [ meta, [ pair1, pair2 ].flatten() ] + } + .dump(tag: "post-map") + + + ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + } + + ch_processed_reads = ch_adapterremoval_reads_prepped + + ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) + ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ) + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index c2e435c..48817db 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP { ch_multiqc_files = Channel.empty() ch_input_for_fastp = reads - .dump(tag: "pre-fastp_branch") .branch{ single: it[0]['single_end'] == true paired: it[0]['single_end'] == false } - ch_input_for_fastp.single.dump(tag: "input_fastp_single") - ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) // Last parameter here turns on merging of PE data FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) @@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP { ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first()) ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first()) - ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped") + ch_processed_reads = ch_fastp_reads_prepped ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) - ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final") - emit: reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 7fba0c0..f0a1ed4 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -4,6 +4,7 @@ include { SHORTREAD_FASTP } from './shortread_fastp' +include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' workflow SHORTREAD_PREPROCESSING { @@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING { ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) + } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { ch_processed_reads = reads } From dcda815bfb03697b69f219f26af69708bd2d28b5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 09:50:08 +0200 Subject: [PATCH 2/8] Add final adapterremoval options to match fastp functionality --- conf/modules.config | 70 ++++++++++++++++--- nextflow.config | 2 +- nextflow_schema.json | 23 ++---- .../local/shortread_adapterremoval.nf | 10 ++- 4 files changed, 71 insertions(+), 34 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 71eaa75..dc8b138 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,18 +62,15 @@ process { ] } - withName: FASTP { - ext.prefix = { "${meta.id}_${meta.run_accession}" } + withName: FASTP_SINGLE { ext.args = [ - // collapsing options - option to retain singletons - params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", - !{ ${meta.single_end} } && params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "", // filtering options "--length_required ${params.shortread_clipmerge_minlength}" ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ path: { "${params.outdir}/fastp" }, mode: 'copy', @@ -81,6 +78,61 @@ process { ] } + withName: FASTP_PAIRED { + ext.args = [ + // collapsing options - option to retain singletons + params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe", + // filtering options + "--length_required ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/fastp" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_SINGLE { + ext.args = [ + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_PAIRED { + ext.args = [ + // collapsing options + params.shortread_clipmerge_mergepairs ? "--collapse" : "", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -99,23 +151,23 @@ process { } withName: MALT_RUN { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: 'copy', pattern: '*.{rma6,tab,text,sam,log}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: 'copy', pattern: '*.{fastq.gz,txt}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/nextflow.config b/nextflow.config index 6fde513..7be36a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,7 @@ params { shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false - shortread_clipmerge_excludeunmerged = true + shortread_clipmerge_excludeunmerged = false shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 diff --git a/nextflow_schema.json b/nextflow_schema.json index ebb748a..fb2ca31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -277,7 +267,7 @@ }, "shortread_clipmerge_excludeunmerged": { "type": "boolean", - "default": true + "default": false }, "longread_clip": { "type": "boolean" @@ -295,10 +285,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -319,4 +306,4 @@ "default": 15 } } -} \ No newline at end of file +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b09356a..b467a64 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -16,7 +16,6 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_multiqc_files = Channel.empty() ch_input_for_adapterremoval = reads - .dump(tag: "pre_adapterremoval_branch") .branch{ single: it[0]['single_end'] == true paired: it[0]['single_end'] == false @@ -36,11 +35,13 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 + meta_new['single_end'] = true [ meta_new, reads ] } .groupTuple(by: 0) + + ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) @@ -51,7 +52,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 + meta_new['single_end'] = true [ meta_new, reads ] } @@ -65,13 +66,10 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) - .dump(tag: "pre-group") .groupTuple(by: 0) - .dump(tag: "post-group") .map { meta, pair1, pair2 -> [ meta, [ pair1, pair2 ].flatten() ] } - .dump(tag: "post-map") ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe From b04cfec2c2ad651e498bf8b0a21108ccb51a1a0a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 11:13:36 +0200 Subject: [PATCH 3/8] Prettier linting --- CITATIONS.md | 18 +++++++++++++++++- modules.json | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 1018ccd..6e43999 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -13,9 +13,25 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -* [Porechop](https://github.com/rrwick/Porechop) +- [fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560. + +- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2) + + > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2. + +- [Porechop](https://github.com/rrwick/Porechop) + +- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + + > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. + +- [MALT](https://doi.org/10.1038/s41559-017-0446-6) + > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. ## Software packaging/containerisation tools diff --git a/modules.json b/modules.json index c4b2f12..dcfbd3f 100644 --- a/modules.json +++ b/modules.json @@ -35,4 +35,4 @@ } } } -} \ No newline at end of file +} From df4f13319fde9e29d1ed5f0780d60aedccde04e5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 11:13:57 +0200 Subject: [PATCH 4/8] More linting --- CITATIONS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CITATIONS.md b/CITATIONS.md index 6e43999..8f286b0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -31,6 +31,7 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. - [MALT](https://doi.org/10.1038/s41559-017-0446-6) + > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. ## Software packaging/containerisation tools From 0d0b377dfb00b8ccd48b47be55b9bcba3dcbd582 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 11:19:00 +0200 Subject: [PATCH 5/8] Add additional test --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb6de77..a8078e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,10 @@ jobs: # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" + parameters: + - "--shortread_clipmerge_tool fastp" + - "--shortread_clipmerge_tool adapterremoval" + steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -47,6 +51,6 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} # From 14f2b7bd84956a2dadb6c2f69d39baf1c6f3e027 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 1 Apr 2022 15:34:05 +0200 Subject: [PATCH 6/8] Apply suggestions from code review Co-authored-by: Moritz E. Beber --- subworkflows/local/shortread_adapterremoval.nf | 18 ++++++++---------- subworkflows/local/shortread_preprocessing.nf | 4 ++-- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b467a64..3701f7e 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -17,14 +17,16 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_input_for_adapterremoval = reads .branch{ - single: it[0]['single_end'] == true - paired: it[0]['single_end'] == false + single: it[0].single_end + paired: !it[0].single_end } ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { + // due to the slightly ugly output implementation of the current AdapterRemoval2 module, each file + // has to be exported in a separate channel, and we must manually recombine when necessary ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, @@ -35,16 +37,14 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = true + meta_new.single_end = true [ meta_new, reads ] } - .groupTuple(by: 0) + .groupTuple() - ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - - ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed @@ -58,9 +58,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { } .groupTuple(by: 0) - ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads - - ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } else { diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index f0a1ed4..1d0caac 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -21,8 +21,8 @@ workflow SHORTREAD_PREPROCESSING { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads - ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) + ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { ch_processed_reads = reads } From 03e832954f89e43a641033dcae059c1870c08b2e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 1 Apr 2022 20:55:33 +0200 Subject: [PATCH 7/8] Update subworkflows/local/shortread_adapterremoval.nf Co-authored-by: Moritz E. Beber --- subworkflows/local/shortread_adapterremoval.nf | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 3701f7e..ac8ff0f 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -62,15 +62,12 @@ workflow SHORTREAD_ADAPTERREMOVAL { } else { - ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated + ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) - .groupTuple(by: 0) + .groupTuple() .map { meta, pair1, pair2 -> [ meta, [ pair1, pair2 ].flatten() ] } - - - ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } From 93aaa9eeacbf6211646853be0f4e18a94574e2f1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 21:36:22 +0200 Subject: [PATCH 8/8] Clean up AdapterRemoval and remove all debugging dumps --- subworkflows/local/db_check.nf | 2 -- subworkflows/local/input_check.nf | 4 --- subworkflows/local/longread_preprocessing.nf | 1 - .../local/shortread_adapterremoval.nf | 32 ++++++++++--------- workflows/taxprofiler.nf | 6 +--- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index bda5cfe..356915a 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -18,9 +18,7 @@ workflow DB_CHECK { parsed_samplesheet = DATABASE_CHECK ( dbsheet ) .csv .splitCsv ( header:true, sep:',' ) - .dump(tag: "db_split_csv_out") .map { create_db_channels(it) } - .dump(tag: "db_channel_prepped") ch_dbs_for_untar = parsed_samplesheet .branch { diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 4501386..ec404c2 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -12,7 +12,6 @@ workflow INPUT_CHECK { parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) - .dump(tag: "input_split_csv_out") .branch { fasta: it['fasta'] != '' nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' @@ -21,17 +20,14 @@ workflow INPUT_CHECK { parsed_samplesheet.fastq .map { create_fastq_channel(it) } - .dump(tag: "fastq_channel_init") .set { fastq } parsed_samplesheet.nanopore .map { create_fastq_channel(it) } - .dump(tag: "fastq_nanopore_channel_init") .set { nanopore } parsed_samplesheet.fasta .map { create_fasta_channel(it) } - .dump(tag: "fasta_channel_init") .set { fasta } emit: diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 7c7c24b..a1515c7 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING { PORECHOP ( reads ) ch_processed_reads = PORECHOP.out.reads - .dump(tag: "pre_fastqc_check") .map { meta, reads -> def meta_new = meta.clone() diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index ac8ff0f..5e005db 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -24,9 +24,10 @@ workflow SHORTREAD_ADAPTERREMOVAL { ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file + // has to be exported in a separate channel, and we must manually recombine when necessary + if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { - // due to the slightly ugly output implementation of the current AdapterRemoval2 module, each file - // has to be exported in a separate channel, and we must manually recombine when necessary ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, @@ -43,22 +44,23 @@ workflow SHORTREAD_ADAPTERREMOVAL { } .groupTuple() - - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { - ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed - .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = true + ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed + .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = true - [ meta_new, reads ] - } - .groupTuple(by: 0) + [ meta_new, reads ] + } + .groupTuple(by: 0) - ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } else { @@ -68,7 +70,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, pair1, pair2 -> [ meta, [ pair1, pair2 ].flatten() ] } - .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) } ch_processed_reads = ch_adapterremoval_reads_prepped diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index af5c54d..ce89a91 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." +if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -126,7 +127,6 @@ workflow TAXPROFILER { ch_input_for_profiling = ch_shortreads_preprocessed .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) - .dump(tag: "reads_plus_db_clean") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' @@ -141,9 +141,7 @@ workflow TAXPROFILER { // loading takes a long time, so we only want to run it once per database // TODO document somewhere we only accept illumina short reads for MALT? ch_input_for_malt = ch_input_for_profiling.malt - .dump(tag: "input_to_malt_prefilter") .filter { it[0]['instrument_platform'] == 'ILLUMINA' } - .dump(tag: "input_to_malt_postfilter") .map { it -> def temp_meta = [ id: it[2]['db_name']] + it[2] @@ -151,7 +149,6 @@ workflow TAXPROFILER { [ temp_meta, it[1], db ] } .groupTuple(by: [0,2]) - .dump(tag: "input_to_malt") .multiMap { it -> reads: [ it[0], it[1].flatten() ] @@ -160,7 +157,6 @@ workflow TAXPROFILER { // We can run Kraken2 one-by-one sample-wise ch_input_for_kraken2 = ch_input_for_profiling.kraken2 - .dump(tag: "input_to_kraken") .multiMap { it -> reads: [ it[0] + it[2], it[1] ]