From 7b08c49cd6cb6471384a26c1202733cad0fe58ae Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 11:54:54 +0200 Subject: [PATCH 01/19] Re-add run merging and gonna let GHA see if it works >.> --- .github/workflows/ci.yml | 1 + nextflow.config | 2 ++ workflows/taxprofiler.nf | 25 +++++++++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79148f0..7678645 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,6 +39,7 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" + - "--run_merging" steps: - name: Check out pipeline code diff --git a/nextflow.config b/nextflow.config index 19cc823..1c69d36 100644 --- a/nextflow.config +++ b/nextflow.config @@ -76,6 +76,8 @@ params { shortread_complexityfilter_prinseqplusplus_dustscore = 0.5 save_complexityfiltered_reads = false + // run merging + run_merging = false // MALT run_malt = false diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 3b08402..61eda6e 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -125,13 +125,34 @@ workflow TAXPROFILER { ch_shortreads_filtered = ch_shortreads_preprocessed } + /* + STEP: Run merging + */ + + if ( params.run_merging ) { + ch_reads_for_cat = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['run_accession'].remove() + [ meta_new, reads ] + } + .groupTuple() + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ) + + } else { + ch_reads_runmerged = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + } + /* COMBINE READS WITH POSSIBLE DATABASES */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = ch_shortreads_filtered - .mix( ch_longreads_preprocessed ) + ch_input_for_profiling = ch_reads_runmerged .combine(DB_CHECK.out.dbs) .branch { malt: it[2]['tool'] == 'malt' From 74c496f6af04e15e1625d7311791004a645b3a21 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 11:58:16 +0200 Subject: [PATCH 02/19] Fix CAT_FASTQ output --- workflows/taxprofiler.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 61eda6e..2d0c17d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -140,8 +140,7 @@ workflow TAXPROFILER { } .groupTuple() - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ) - + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -247,6 +246,10 @@ workflow TAXPROFILER { ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } + if (params.run_merging){ + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + } + if (params.run_kraken2) { ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) From 4d726a87e98f56bf1a4e0d52259d9e83f89539ce Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 12:01:07 +0200 Subject: [PATCH 03/19] Fix metadata removal --- workflows/taxprofiler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 2d0c17d..33d9725 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -135,7 +135,7 @@ workflow TAXPROFILER { .map { meta, reads -> def meta_new = meta.clone() - meta_new['run_accession'].remove() + meta_new.remove('run_accession') [ meta_new, reads ] } .groupTuple() From d130a72d74899c3fd85db3c1c751b7b6848fd031 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:09:23 +0200 Subject: [PATCH 04/19] Get this working --- .github/workflows/ci.yml | 1 + conf/modules.config | 6 +++--- subworkflows/local/shortread_fastp.nf | 4 ++-- workflows/taxprofiler.nf | 18 ++++++++++++++++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7678645..53423cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,6 +40,7 @@ jobs: - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" - "--run_merging" + - "--run_merging --shortread_clipmerge_mergepairs" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index b59850f..7602e3f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -163,7 +163,7 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +173,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -183,7 +183,7 @@ process { withName: METAPHLAN3 { publishDir = [ - path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, + ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 18baf17..04057b1 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,8 +28,8 @@ workflow SHORTREAD_FASTP { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + meta_new['single_end'] = true + [ meta_new, reads.flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 33d9725..e04d4d6 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -130,7 +130,8 @@ workflow TAXPROFILER { */ if ( params.run_merging ) { - ch_reads_for_cat = ch_shortreads_filtered + + ch_reads_for_cat_branch = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) .map { meta, reads -> @@ -139,8 +140,21 @@ workflow TAXPROFILER { [ meta_new, reads ] } .groupTuple() + .map { + meta, reads -> + [ meta, reads.flatten() ] + } + .branch { + // we can't concate files if there is not a second run, we branch + // here to separate them out, and mix after + cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 ) + skip: true + } + + ch_reads_for_cat_branch.cat.dump(tag: "for_catting") + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip ) - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) From ca011ccc5b363ea9d6c1eaf44713a9c09e471f39 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:28:37 +0200 Subject: [PATCH 05/19] Fix cat_fastq naming logic? --- conf/modules.config | 19 +++++++++++++++---- nextflow.config | 1 + nextflow_schema.json | 38 +++++++++++++++++++++++++++++--------- workflows/taxprofiler.nf | 4 ---- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7602e3f..97e9510 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -161,9 +161,19 @@ process { ] } + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}-${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/run_merging/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_runmerged_reads + ] + } + withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +183,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -182,12 +192,13 @@ process { } withName: METAPHLAN3 { + ext.args = { "${meta.db_params}" } + ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/nextflow.config b/nextflow.config index 1c69d36..d969ed9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -78,6 +78,7 @@ params { // run merging run_merging = false + save_runmerged_reads = false // MALT run_malt = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 6858409..0b4b4fb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -173,7 +176,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -288,7 +298,10 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": ["fastp", "adapterremoval"] + "enum": [ + "fastp", + "adapterremoval" + ] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -309,8 +322,7 @@ "default": 15 }, "save_preprocessed_reads": { - "type": "boolean", - "default": false + "type": "boolean" }, "shortread_complexityfilter_tool": { "type": "string", @@ -333,15 +345,23 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": ["entropy", "dust"] + "enum": [ + "entropy", + "dust" + ] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", "default": 0.5 }, "save_complexityfiltered_reads": { - "type": "boolean", - "default": false + "type": "boolean" + }, + "run_merging": { + "type": "boolean" + }, + "save_runmerged_reads": { + "type": "boolean" } } -} +} \ No newline at end of file diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index e04d4d6..bdb93ab 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -140,10 +140,6 @@ workflow TAXPROFILER { [ meta_new, reads ] } .groupTuple() - .map { - meta, reads -> - [ meta, reads.flatten() ] - } .branch { // we can't concate files if there is not a second run, we branch // here to separate them out, and mix after From a634814d848a68252d81b231e34942f5fa616c83 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:34:00 +0200 Subject: [PATCH 06/19] Formatting and fix fastp output --- nextflow_schema.json | 26 ++++--------------- .../local/shortread_adapterremoval.nf | 2 +- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 0b4b4fb..64836df 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -176,14 +173,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -298,10 +288,7 @@ "shortread_clipmerge_tool": { "type": "string", "default": "fastp", - "enum": [ - "fastp", - "adapterremoval" - ] + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -345,10 +332,7 @@ "shortread_complexityfilter_prinseqplusplus_mode": { "type": "string", "default": "entropy", - "enum": [ - "entropy", - "dust" - ] + "enum": ["entropy", "dust"] }, "shortread_complexityfilter_prinseqplusplus_dustscore": { "type": "number", @@ -364,4 +348,4 @@ "type": "boolean" } } -} \ No newline at end of file +} diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index b573be9..a7948e7 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, fastq.flatten()] } + .map { meta, fastq -> [meta, [ fastq ].flatten()] } CAT_FASTQ(ch_concat_fastq) From 6c14f2b230a6c6df99e89c44ed8904cb0a5a7b59 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:44:52 +0200 Subject: [PATCH 07/19] Remove the flattening? --- subworkflows/local/shortread_adapterremoval.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index a7948e7..735d3b8 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, [ fastq ].flatten()] } + .map { meta, fastq -> [meta, fastq] } CAT_FASTQ(ch_concat_fastq) From 35cb6e042acf3fbe34c9664bebb9c27ef0d96179 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 13:46:12 +0200 Subject: [PATCH 08/19] Flatten the right thing --- subworkflows/local/shortread_adapterremoval.nf | 2 +- subworkflows/local/shortread_fastp.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf index 735d3b8..b573be9 100644 --- a/subworkflows/local/shortread_adapterremoval.nf +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL { [meta_new, reads] } .groupTuple() - .map { meta, fastq -> [meta, fastq] } + .map { meta, fastq -> [meta, fastq.flatten()] } CAT_FASTQ(ch_concat_fastq) diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 04057b1..4626691 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -29,7 +29,7 @@ workflow SHORTREAD_FASTP { meta, reads -> def meta_new = meta.clone() meta_new['single_end'] = true - [ meta_new, reads.flatten() ] + [ meta_new, [ reads ].flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) From 8839fe22b8856fb1d69e11e2e1e0a4367cb97dc3 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 20:02:22 +0200 Subject: [PATCH 09/19] Fix output tuple for reads --- workflows/taxprofiler.nf | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index bdb93ab..7d5f60f 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -149,7 +149,15 @@ workflow TAXPROFILER { ch_reads_for_cat_branch.cat.dump(tag: "for_catting") - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip ) + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads + .mix( ch_reads_for_cat_branch.skip ) + .map { + meta, reads -> + + [ meta, [ reads ].flatten() ] + } + + ch_reads_runmerged.dump(tag: "ch_reads_runmerged" ) } else { ch_reads_runmerged = ch_shortreads_filtered From afb66e445fcb2f9c60f4152e1184f5f8600a843c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 8 Apr 2022 21:36:59 +0200 Subject: [PATCH 10/19] Append pairment to ID at profiling to prevent multiqc-level filename crash --- workflows/taxprofiler.nf | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 7d5f60f..1da812d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -147,8 +147,6 @@ workflow TAXPROFILER { skip: true } - ch_reads_for_cat_branch.cat.dump(tag: "for_catting") - ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads .mix( ch_reads_for_cat_branch.skip ) .map { @@ -157,8 +155,6 @@ workflow TAXPROFILER { [ meta, [ reads ].flatten() ] } - ch_reads_runmerged.dump(tag: "ch_reads_runmerged" ) - } else { ch_reads_runmerged = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -168,8 +164,15 @@ workflow TAXPROFILER { COMBINE READS WITH POSSIBLE DATABASES */ - // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] + // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], [ /2612.merged.fastq.gz ], ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = ch_reads_runmerged + .map { + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .combine(DB_CHECK.out.dbs) .branch { malt: it[2]['tool'] == 'malt' From 989dc55ce107ae01331727eb9678a0d41e22de03 Mon Sep 17 00:00:00 2001 From: sofstam Date: Mon, 11 Apr 2022 17:42:38 +0200 Subject: [PATCH 11/19] Delete extra nf-core directory --- nf-core/modules/centrifuge/centrifuge/main.nf | 61 ----------------- .../modules/centrifuge/centrifuge/meta.yml | 66 ------------------- 2 files changed, 127 deletions(-) delete mode 100644 nf-core/modules/centrifuge/centrifuge/main.nf delete mode 100644 nf-core/modules/centrifuge/centrifuge/meta.yml diff --git a/nf-core/modules/centrifuge/centrifuge/main.nf b/nf-core/modules/centrifuge/centrifuge/main.nf deleted file mode 100644 index 3d23fc9..0000000 --- a/nf-core/modules/centrifuge/centrifuge/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -process CENTRIFUGE_CENTRIFUGE { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' : - 'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" - - input: - tuple val(meta), path(reads) - path db - val save_unaligned - val save_aligned - val sam_format - - output: - tuple val(meta), path('*report.txt') , emit: report - tuple val(meta), path('*results.txt') , emit: results - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped - tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" - def unaligned = '' - def aligned = '' - if (meta.single_end) { - unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' - } else { - unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' - aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' - } - def sam_output = sam_format ? "--out-fmt 'sam'" : '' - """ - ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included - db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/.1.cf//'` - centrifuge \\ - -x \$db_name \\ - -p $task.cpus \\ - $paired \\ - --report-file ${prefix}.report.txt \\ - -S ${prefix}.results.txt \\ - $unaligned \\ - $aligned \\ - $sam_output \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') - END_VERSIONS - """ -} diff --git a/nf-core/modules/centrifuge/centrifuge/meta.yml b/nf-core/modules/centrifuge/centrifuge/meta.yml deleted file mode 100644 index a252c00..0000000 --- a/nf-core/modules/centrifuge/centrifuge/meta.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: centrifuge_centrifuge -description: Classifies metagenomic sequence data -keywords: - - classify - - metagenomics - - fastq - - db -tools: - - centrifuge: - description: Centrifuge is a classifier for metagenomic sequences. - homepage: https://ccb.jhu.edu/software/centrifuge/ - documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml - doi: 10.1101/gr.210641.116 - licence: ["GPL v3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - db: - type: directory - description: Path to directory containing centrifuge database files - - save_unaligned: - type: value - description: If true unmapped fastq files are saved - - save_aligned: - type: value - description: If true mapped fastq files are saved -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - report: - type: file - description: | - File containing a classification summary - pattern: "*.{report.txt}" - - results: - type: file - description: | - File containing classification results - pattern: "*.{results.txt}" - - fastq_unmapped: - type: file - description: Unmapped fastq files - pattern: "*.unmapped.fastq.gz" - - fastq_mapped: - type: file - description: Mapped fastq files - pattern: "*.mapped.fastq.gz" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@sofstam" - - "@jfy133" - - "@sateeshperi" From a5f4fc42d53fce99863a3d135b017b9e468722be Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 09:25:27 +0200 Subject: [PATCH 12/19] Fix run merging for unmerged PE data --- conf/modules.config | 2 +- workflows/taxprofiler.nf | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 97e9510..eb448bb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -162,7 +162,7 @@ process { } withName: CAT_FASTQ { - ext.prefix = { "${meta.id}-${meta.run_accession}" } + ext.prefix = { "${meta.id}" } publishDir = [ path: { "${params.outdir}/run_merging/" }, mode: params.publish_dir_mode, diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 1da812d..2eb7e8c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -139,7 +139,11 @@ workflow TAXPROFILER { meta_new.remove('run_accession') [ meta_new, reads ] } - .groupTuple() + .groupTuple(by: 0) + .map { + meta, reads -> + [ meta, reads.flatten() ] + } .branch { // we can't concate files if there is not a second run, we branch // here to separate them out, and mix after @@ -151,7 +155,6 @@ workflow TAXPROFILER { .mix( ch_reads_for_cat_branch.skip ) .map { meta, reads -> - [ meta, [ reads ].flatten() ] } From 9f221f84cc66ca121a46d8f68db8f727dec523c9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:12:17 +0200 Subject: [PATCH 13/19] Only supply single input channel to profiling, as these are merged into single input channel at run_merging --- subworkflows/local/profiling.nf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c74c583..07b6b72 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,8 +9,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta workflow PROFILING { take: - shortreads // [ [ meta ], [ reads ] ] - longreads // [ [ meta ], [ reads ] ] + reads // [ [ meta ], [ reads ] ] databases // [ [ meta ], path ] main: @@ -22,9 +21,9 @@ workflow PROFILING { */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = shortreads - .mix( longreads ) + ch_input_for_profiling = reads .combine(databases) + .dump(tag: "combined_withdbs") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 2ef21c6ef3e46ae824ad5c99970b8a5250ef8e38 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:14:05 +0200 Subject: [PATCH 14/19] Fix input to profiling --- subworkflows/local/profiling.nf | 1 - workflows/taxprofiler.nf | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 07b6b72..ac744aa 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,7 +23,6 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) - .dump(tag: "combined_withdbs") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 0916cac..7c02f4c 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -167,7 +167,7 @@ workflow TAXPROFILER { SUBWORKFLOW: PROFILING */ - PROFILING ( ch_reads_runmerged, ch_longreads_preprocessed, DB_CHECK.out.dbs ) + PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) /* From 26399718b2440499f294ebc59d669dd78dd7cdb6 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:46:03 +0200 Subject: [PATCH 15/19] Re-add pairment attachment --- subworkflows/local/profiling.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index ac744aa..b03b83e 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,6 +23,12 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 8d689141924a52cf72e666e76db1b575d541a341 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:47:44 +0200 Subject: [PATCH 16/19] Re-add operator name --- subworkflows/local/profiling.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index b03b83e..8a156c2 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -23,6 +23,7 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads .combine(databases) + .map { meta, reads -> def meta_new = meta.clone() pairtype = meta_new['single_end'] ? '_se' : '_pe' From a15c45b00cd18e2148baf7112f15c340261acdef Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 12 Apr 2022 10:51:27 +0200 Subject: [PATCH 17/19] Put map in the rigt place --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 8a156c2..18de739 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -22,7 +22,6 @@ workflow PROFILING { // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] ch_input_for_profiling = reads - .combine(databases) .map { meta, reads -> def meta_new = meta.clone() @@ -30,6 +29,7 @@ workflow PROFILING { meta_new['id'] = meta_new['id'] + pairtype [meta_new, reads] } + .combine(databases) .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' From 16a3556bfcfddf8195bbcd486653325aaa1de4ee Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 08:26:08 +0200 Subject: [PATCH 18/19] Changes after code review --- workflows/taxprofiler.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 7c02f4c..58671b3 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -139,15 +139,16 @@ workflow TAXPROFILER { meta_new.remove('run_accession') [ meta_new, reads ] } - .groupTuple(by: 0) + .groupTuple() .map { meta, reads -> [ meta, reads.flatten() ] } .branch { + meta, reads -> // we can't concatenate files if there is not a second run, we branch // here to separate them out, and mix back in after for efficiency - cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 ) + cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 ) skip: true } From dfeaa0d1fe79a6f59c27bdb481904b3bbb620234 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 12:00:28 +0200 Subject: [PATCH 19/19] Rename subworkflow parameters for consistency --- .github/workflows/ci.yml | 8 ++++---- conf/modules.config | 6 +++--- conf/test.config | 6 +++--- nextflow.config | 11 ++++++----- nextflow_schema.json | 22 +++++++++++----------- workflows/taxprofiler.nf | 16 ++++++++-------- 6 files changed, 35 insertions(+), 34 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53423cb..c373bc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,8 @@ jobs: - NXF_VER: "" NXF_EDGE: "1" parameters: - - "--longread_clip false" - - "--shortread_clip false" + - "--perform_longread_clip false" + - "--perform_shortread_clipmerge false" - "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" @@ -39,8 +39,8 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" - - "--run_merging" - - "--run_merging --shortread_clipmerge_mergepairs" + - "--perform_runmerging" + - "--perform_runmerging --shortread_clipmerge_mergepairs" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index d93486f..42528de 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -173,7 +173,7 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -183,7 +183,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -193,7 +193,7 @@ process { withName: METAPHLAN3 { ext.args = { "${meta.db_params}" } - ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index 6e82300..923dda7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { run_malt = true run_metaphlan3 = true run_centrifuge = true - shortread_clipmerge = true - longread_clip = false - shortread_complexityfilter = true + perform_shortread_clipmerge = true + perform_longread_clip = false + perform_shortread_complexityfilter = true } diff --git a/nextflow.config b/nextflow.config index da8bbdb..b72b4f9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { databases = null // FASTQ preprocessing - shortread_clipmerge = false + perform_shortread_clipmerge = false shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false @@ -63,11 +63,11 @@ params { shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 - longread_clip = false + perform_longread_clip = false save_preprocessed_reads = false // Complexity filtering - shortread_complexityfilter = false + perform_shortread_complexityfilter = false shortread_complexityfilter_tool = 'bbduk' shortread_complexityfilter_entropy = 0.3 shortread_complexityfilter_bbduk_windowsize = 50 @@ -77,8 +77,8 @@ params { save_complexityfiltered_reads = false // run merging - run_merging = false - save_runmerged_reads = false + perform_runmerging = false + save_runmerged_reads = false // MALT run_malt = false @@ -92,6 +92,7 @@ params { centrifuge_save_unaligned = false centrifuge_save_aligned = false centrifuge_sam_format = false + // metaphlan3 run_metaphlan3 = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 4db7fa0..06bd94b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -262,15 +262,9 @@ "type": "string", "default": "None" }, - "shortread_clipmerge": { - "type": "boolean" - }, "shortread_clipmerge_excludeunmerged": { "type": "boolean" }, - "longread_clip": { - "type": "boolean" - }, "run_malt": { "type": "boolean" }, @@ -334,9 +328,6 @@ "shortread_complexityfilter_bbduk_mask": { "type": "boolean" }, - "shortread_complexityfilter": { - "type": "boolean" - }, "shortread_complexityfilter_entropy": { "type": "number", "default": 0.3 @@ -353,10 +344,19 @@ "save_complexityfiltered_reads": { "type": "boolean" }, - "run_merging": { + "save_runmerged_reads": { "type": "boolean" }, - "save_runmerged_reads": { + "perform_shortread_clipmerge": { + "type": "boolean" + }, + "perform_longread_clip": { + "type": "boolean" + }, + "perform_shortread_complexityfilter": { + "type": "boolean" + }, + "perform_runmerging": { "type": "boolean" } } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 58671b3..f086557 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -101,14 +101,14 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ - if ( params.shortread_clipmerge ) { + if ( params.perform_shortread_clipmerge ) { ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq } - if ( params.longread_clip ) { + if ( params.perform_longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } } else { @@ -119,7 +119,7 @@ workflow TAXPROFILER { SUBWORKFLOW: COMPLEXITY FILTERING */ - if ( params.shortread_complexityfilter ) { + if ( params.perform_shortread_complexityfilter ) { ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads } else { ch_shortreads_filtered = ch_shortreads_preprocessed @@ -129,7 +129,7 @@ workflow TAXPROFILER { STEP: Run merging */ - if ( params.run_merging ) { + if ( params.perform_runmerging ) { ch_reads_for_cat_branch = ch_shortreads_filtered .mix( ch_longreads_preprocessed ) @@ -190,22 +190,22 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.shortread_clipmerge) { + if (params.perform_shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } - if (params.longread_clip) { + if (params.perform_longread_clip) { ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) } - if (params.shortread_complexityfilter){ + if (params.perform_shortread_complexityfilter){ ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } - if (params.run_merging){ + if (params.perform_runmerging){ ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) }