From dcda815bfb03697b69f219f26af69708bd2d28b5 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 1 Apr 2022 09:50:08 +0200 Subject: [PATCH] Add final adapterremoval options to match fastp functionality --- conf/modules.config | 70 ++++++++++++++++--- nextflow.config | 2 +- nextflow_schema.json | 23 ++---- .../local/shortread_adapterremoval.nf | 10 ++- 4 files changed, 71 insertions(+), 34 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 71eaa75..dc8b138 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,18 +62,15 @@ process { ] } - withName: FASTP { - ext.prefix = { "${meta.id}_${meta.run_accession}" } + withName: FASTP_SINGLE { ext.args = [ - // collapsing options - option to retain singletons - params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", - !{ ${meta.single_end} } && params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "", // filtering options "--length_required ${params.shortread_clipmerge_minlength}" ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ path: { "${params.outdir}/fastp" }, mode: 'copy', @@ -81,6 +78,61 @@ process { ] } + withName: FASTP_PAIRED { + ext.args = [ + // collapsing options - option to retain singletons + params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe", + // filtering options + "--length_required ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/fastp" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_SINGLE { + ext.args = [ + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_PAIRED { + ext.args = [ + // collapsing options + params.shortread_clipmerge_mergepairs ? "--collapse" : "", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -99,23 +151,23 @@ process { } withName: MALT_RUN { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: 'copy', pattern: '*.{rma6,tab,text,sam,log}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: 'copy', pattern: '*.{fastq.gz,txt}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/nextflow.config b/nextflow.config index 6fde513..7be36a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,7 @@ params { shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false - shortread_clipmerge_excludeunmerged = true + shortread_clipmerge_excludeunmerged = false shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 diff --git a/nextflow_schema.json b/nextflow_schema.json index ebb748a..fb2ca31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -277,7 +267,7 @@ }, "shortread_clipmerge_excludeunmerged": { "type": "boolean", - "default": true + "default": false }, "longread_clip": { "type": "boolean" @@ -295,10 +285,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -319,4 +306,4 @@ "default": 15 } } -} \ No newline at end of file +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b09356a..b467a64 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -16,7 +16,6 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_multiqc_files = Channel.empty() ch_input_for_adapterremoval = reads - .dump(tag: "pre_adapterremoval_branch") .branch{ single: it[0]['single_end'] == true paired: it[0]['single_end'] == false @@ -36,11 +35,13 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 + meta_new['single_end'] = true [ meta_new, reads ] } .groupTuple(by: 0) + + ch_adapterremoval_reads_prepped_pe = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) @@ -51,7 +52,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 + meta_new['single_end'] = true [ meta_new, reads ] } @@ -65,13 +66,10 @@ workflow SHORTREAD_ADAPTERREMOVAL { ch_adapterremoval_reads_prepped_pe = ADAPTERREMOVAL_PAIRED.out.pair1_truncated .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) - .dump(tag: "pre-group") .groupTuple(by: 0) - .dump(tag: "post-group") .map { meta, pair1, pair2 -> [ meta, [ pair1, pair2 ].flatten() ] } - .dump(tag: "post-map") ch_adapterremoval_reads_prepped = ch_adapterremoval_reads_prepped_pe