diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79148f0..c373bc8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,8 @@ jobs: - NXF_VER: "" NXF_EDGE: "1" parameters: - - "--longread_clip false" - - "--shortread_clip false" + - "--perform_longread_clip false" + - "--perform_shortread_clipmerge false" - "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" @@ -39,6 +39,8 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" + - "--perform_runmerging" + - "--perform_runmerging --shortread_clipmerge_mergepairs" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index 2f5710e..42528de 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -161,9 +161,19 @@ process { ] } + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/run_merging/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_runmerged_reads + ] + } + withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +183,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -182,12 +192,13 @@ process { } withName: METAPHLAN3 { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CENTRIFUGE_CENTRIFUGE { diff --git a/conf/test.config b/conf/test.config index 6e82300..923dda7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,7 @@ params { run_malt = true run_metaphlan3 = true run_centrifuge = true - shortread_clipmerge = true - longread_clip = false - shortread_complexityfilter = true + perform_shortread_clipmerge = true + perform_longread_clip = false + perform_shortread_complexityfilter = true } diff --git a/nextflow.config b/nextflow.config index b4a8d91..b72b4f9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { databases = null // FASTQ preprocessing - shortread_clipmerge = false + perform_shortread_clipmerge = false shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false @@ -63,11 +63,11 @@ params { shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 - longread_clip = false + perform_longread_clip = false save_preprocessed_reads = false // Complexity filtering - shortread_complexityfilter = false + perform_shortread_complexityfilter = false shortread_complexityfilter_tool = 'bbduk' shortread_complexityfilter_entropy = 0.3 shortread_complexityfilter_bbduk_windowsize = 50 @@ -76,6 +76,9 @@ params { shortread_complexityfilter_prinseqplusplus_dustscore = 0.5 save_complexityfiltered_reads = false + // run merging + perform_runmerging = false + save_runmerged_reads = false // MALT run_malt = false @@ -89,6 +92,7 @@ params { centrifuge_save_unaligned = false centrifuge_save_aligned = false centrifuge_sam_format = false + // metaphlan3 run_metaphlan3 = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index 2b115eb..06bd94b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -262,15 +262,9 @@ "type": "string", "default": "None" }, - "shortread_clipmerge": { - "type": "boolean" - }, "shortread_clipmerge_excludeunmerged": { "type": "boolean" }, - "longread_clip": { - "type": "boolean" - }, "run_malt": { "type": "boolean" }, @@ -321,8 +315,7 @@ "default": 15 }, "save_preprocessed_reads": { - "type": "boolean", - "default": false + "type": "boolean" }, "shortread_complexityfilter_tool": { "type": "string", @@ -335,9 +328,6 @@ "shortread_complexityfilter_bbduk_mask": { "type": "boolean" }, - "shortread_complexityfilter": { - "type": "boolean" - }, "shortread_complexityfilter_entropy": { "type": "number", "default": 0.3 @@ -352,8 +342,22 @@ "default": 0.5 }, "save_complexityfiltered_reads": { - "type": "boolean", - "default": false + "type": "boolean" + }, + "save_runmerged_reads": { + "type": "boolean" + }, + "perform_shortread_clipmerge": { + "type": "boolean" + }, + "perform_longread_clip": { + "type": "boolean" + }, + "perform_shortread_complexityfilter": { + "type": "boolean" + }, + "perform_runmerging": { + "type": "boolean" } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c74c583..18de739 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,8 +9,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta workflow PROFILING { take: - shortreads // [ [ meta ], [ reads ] ] - longreads // [ [ meta ], [ reads ] ] + reads // [ [ meta ], [ reads ] ] databases // [ [ meta ], path ] main: @@ -22,8 +21,14 @@ workflow PROFILING { */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = shortreads - .mix( longreads ) + ch_input_for_profiling = reads + .map { + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .combine(databases) .branch { malt: it[2]['tool'] == 'malt' diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 9fb9425..6fed2ae 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,8 +28,8 @@ workflow SHORTREAD_FASTP { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + meta_new['single_end'] = true + [ meta_new, [ reads ].flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 4b9f927..f086557 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -101,14 +101,14 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ - if ( params.shortread_clipmerge ) { + if ( params.perform_shortread_clipmerge ) { ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq } - if ( params.longread_clip ) { + if ( params.perform_longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } } else { @@ -119,17 +119,56 @@ workflow TAXPROFILER { SUBWORKFLOW: COMPLEXITY FILTERING */ - if ( params.shortread_complexityfilter ) { + if ( params.perform_shortread_complexityfilter ) { ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads } else { ch_shortreads_filtered = ch_shortreads_preprocessed } + /* + STEP: Run merging + */ + + if ( params.perform_runmerging ) { + + ch_reads_for_cat_branch = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new.remove('run_accession') + [ meta_new, reads ] + } + .groupTuple() + .map { + meta, reads -> + [ meta, reads.flatten() ] + } + .branch { + meta, reads -> + // we can't concatenate files if there is not a second run, we branch + // here to separate them out, and mix back in after for efficiency + cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 ) + skip: true + } + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads + .mix( ch_reads_for_cat_branch.skip ) + .map { + meta, reads -> + [ meta, [ reads ].flatten() ] + } + + } else { + ch_reads_runmerged = ch_shortreads_filtered + .mix( ch_longreads_preprocessed ) + } + /* SUBWORKFLOW: PROFILING */ - PROFILING ( ch_shortreads_filtered, ch_longreads_preprocessed, DB_CHECK.out.dbs ) + PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) /* @@ -151,21 +190,25 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.shortread_clipmerge) { + if (params.perform_shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } - if (params.longread_clip) { + if (params.perform_longread_clip) { ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) } - if (params.shortread_complexityfilter){ + if (params.perform_shortread_complexityfilter){ ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } + if (params.perform_runmerging){ + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + } + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc ) // TODO create multiQC module for metaphlan