diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79148f0..a1ece72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,8 @@ jobs: - NXF_VER: "" NXF_EDGE: "1" parameters: - - "--longread_clip false" - - "--shortread_clip false" + - "--perform_longread_clip false" + - "--perform_shortread_clipmerge false" - "--shortread_clipmerge_tool fastp" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged" - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs" @@ -39,6 +39,9 @@ jobs: - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs" - "--shortread_complexityfilter_tool bbduk" - "--shortread_complexityfilter_tool prinseq" + - "--perform_runmerging" + - "--perform_runmerging --shortread_clipmerge_mergepairs" + - "--shortread_complexityfilter false --perform_shortread_hostremoval" steps: - name: Check out pipeline code diff --git a/conf/modules.config b/conf/modules.config index 2f5710e..ccd1748 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -132,6 +132,24 @@ process { ] } + withName: BOWTIE2_BUILD { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/build" }, + mode: params.publish_dir_mode, + pattern: '*.bt2' + ] + } + + withName: BOWTIE2_ALIGN { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/align" }, + mode: params.publish_dir_mode, + pattern: '*.{fastq.gz,bam}' + ] + } + withName: BBMAP_BBDUK { ext.args = [ "entropy=${params.shortread_complexityfilter_entropy}", @@ -161,9 +179,19 @@ process { ] } + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/run_merging/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_runmerged_reads + ] + } + withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -173,7 +201,7 @@ process { withName: KRAKEN2_KRAKEN2 { ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, @@ -182,12 +210,13 @@ process { } withName: METAPHLAN3 { + ext.args = { "${meta.db_params}" } + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CENTRIFUGE_CENTRIFUGE { diff --git a/conf/test.config b/conf/test.config index 6e82300..9fa5de8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,7 +28,9 @@ params { run_malt = true run_metaphlan3 = true run_centrifuge = true - shortread_clipmerge = true - longread_clip = false - shortread_complexityfilter = true + perform_shortread_clipmerge = true + perform_longread_clip = false + perform_shortread_complexityfilter = true + perform_shortread_hostremoval = true + shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta' } diff --git a/modules.json b/modules.json index e921454..18c0e69 100644 --- a/modules.json +++ b/modules.json @@ -9,6 +9,12 @@ "bbmap/bbduk": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "bowtie2/align": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bowtie2/build": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf new file mode 100644 index 0000000..7e8a965 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -0,0 +1,77 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + + input: + tuple val(meta), path(reads) + path index + val save_unaligned + + output: + tuple val(meta), path('*.bam') , emit: bam + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -U $reads \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } else { + def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` + bowtie2 \\ + -x \$INDEX \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml new file mode 100644 index 0000000..f80421e --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -0,0 +1,51 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf new file mode 100644 index 0000000..a4da62d --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + path fasta + + output: + path 'bowtie2' , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml new file mode 100644 index 0000000..2da9a21 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/meta.yml @@ -0,0 +1,33 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 8ddf365..bf3ca92 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,7 +55,7 @@ params { databases = null // FASTQ preprocessing - shortread_clipmerge = false + perform_shortread_clipmerge = false shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false @@ -63,11 +63,11 @@ params { shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 - longread_clip = false + perform_longread_clip = false save_preprocessed_reads = false // Complexity filtering - shortread_complexityfilter = false + perform_shortread_complexityfilter = false shortread_complexityfilter_tool = 'bbduk' shortread_complexityfilter_entropy = 0.3 shortread_complexityfilter_bbduk_windowsize = 50 @@ -76,6 +76,14 @@ params { shortread_complexityfilter_prinseqplusplus_dustscore = 0.5 save_complexityfiltered_reads = false + // run merging + perform_runmerging = false + save_runmerged_reads = false + + // Host Removal + perform_shortread_hostremoval = false + shortread_hostremoval_reference = null + shortread_hostremoval_index = null // MALT run_malt = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 2b115eb..cf0edab 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -262,15 +262,9 @@ "type": "string", "default": "None" }, - "shortread_clipmerge": { - "type": "boolean" - }, "shortread_clipmerge_excludeunmerged": { "type": "boolean" }, - "longread_clip": { - "type": "boolean" - }, "run_malt": { "type": "boolean" }, @@ -321,8 +315,7 @@ "default": 15 }, "save_preprocessed_reads": { - "type": "boolean", - "default": false + "type": "boolean" }, "shortread_complexityfilter_tool": { "type": "string", @@ -335,9 +328,6 @@ "shortread_complexityfilter_bbduk_mask": { "type": "boolean" }, - "shortread_complexityfilter": { - "type": "boolean" - }, "shortread_complexityfilter_entropy": { "type": "number", "default": 0.3 @@ -352,8 +342,33 @@ "default": 0.5 }, "save_complexityfiltered_reads": { - "type": "boolean", - "default": false + "type": "boolean" + }, + "save_runmerged_reads": { + "type": "boolean" + }, + "perform_shortread_clipmerge": { + "type": "boolean" + }, + "perform_longread_clip": { + "type": "boolean" + }, + "perform_shortread_complexityfilter": { + "type": "boolean" + }, + "perform_runmerging": { + "type": "boolean" + }, + "perform_shortread_hostremoval": { + "type": "boolean" + }, + "shortread_hostremoval_reference": { + "type": "string", + "default": null + }, + "shortread_hostremoval_index": { + "type": "string", + "default": null } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 59b0dc0..23b96e7 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,8 +9,7 @@ include { METAPHLAN3 } from '../../modules/nf-core/modules/meta workflow PROFILING { take: - shortreads // [ [ meta ], [ reads ] ] - longreads // [ [ meta ], [ reads ] ] + reads // [ [ meta ], [ reads ] ] databases // [ [ meta ], path ] main: @@ -23,8 +22,14 @@ workflow PROFILING { */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = shortreads - .mix( longreads ) + ch_input_for_profiling = reads + .map { + meta, reads -> + def meta_new = meta.clone() + pairtype = meta_new['single_end'] ? '_se' : '_pe' + meta_new['id'] = meta_new['id'] + pairtype + [meta_new, reads] + } .combine(databases) .branch { malt: it[2]['tool'] == 'malt' diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 9fb9425..6fed2ae 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,8 +28,8 @@ workflow SHORTREAD_FASTP { .map { meta, reads -> def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + meta_new['single_end'] = true + [ meta_new, [ reads ].flatten() ] } ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf new file mode 100644 index 0000000..505f989 --- /dev/null +++ b/subworkflows/local/shortread_hostremoval.nf @@ -0,0 +1,34 @@ +// +// Remove host reads via alignment and export off-target reads +// + +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' + +workflow SHORTREAD_HOSTREMOVAL { + take: + reads // [ [ meta ], [ reads ] ] + reference // /path/to/fasta + index // /path/to/index + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( !params.shortread_hostremoval_index ) { + ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + } else { + ch_bowtie2_index = index.first() + } + + BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true ) + ch_versions = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log ) + + emit: + reads = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 4b9f927..9fe8cc8 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -11,7 +11,9 @@ WorkflowTaxprofiler.initialise(params, log) // TODO nf-core: Add all file path parameters for the pipeline to the list below // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.databases, params.multiqc_config ] +def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference, + params.shortread_hostremoval_index, params.multiqc_config + ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters @@ -20,6 +22,12 @@ if (params.databases) { ch_databases = file(params.databases) } else { exit 1, ' if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" +if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." } +if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." } + +if (params.shortread_hostremoval_reference ) { ch_reference = file(params.shortread_hostremoval_reference) } +if (params.shortread_hostremoval_index ) { ch_reference_index = file(params.shortread_hostremoval_index ) } else { ch_reference_index = [] } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -43,6 +51,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { DB_CHECK } from '../subworkflows/local/db_check' include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing' include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing' +include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_hostremoval' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' @@ -101,16 +110,17 @@ workflow TAXPROFILER { /* SUBWORKFLOW: PERFORM PREPROCESSING */ - if ( params.shortread_clipmerge ) { + if ( params.perform_shortread_clipmerge ) { ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads } else { ch_shortreads_preprocessed = INPUT_CHECK.out.fastq } - if ( params.longread_clip ) { + if ( params.perform_longread_clip ) { ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads .map { it -> [ it[0], [it[1]] ] } + ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first()) } else { ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } @@ -119,17 +129,63 @@ workflow TAXPROFILER { SUBWORKFLOW: COMPLEXITY FILTERING */ - if ( params.shortread_complexityfilter ) { + if ( params.perform_shortread_complexityfilter ) { ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads } else { ch_shortreads_filtered = ch_shortreads_preprocessed } + /* + SUBWORKFLOW: HOST REMOVAL + */ + + if ( params.perform_shortread_hostremoval ) { + ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads + ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first()) + } else { + ch_shortreads_hostremoved = ch_shortreads_filtered + } + + if ( params.perform_runmerging ) { + + ch_reads_for_cat_branch = ch_shortreads_hostremoved + .mix( ch_longreads_preprocessed ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new.remove('run_accession') + [ meta_new, reads ] + } + .groupTuple() + .map { + meta, reads -> + [ meta, reads.flatten() ] + } + .branch { + meta, reads -> + // we can't concatenate files if there is not a second run, we branch + // here to separate them out, and mix back in after for efficiency + cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 ) + skip: true + } + + ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads + .mix( ch_reads_for_cat_branch.skip ) + .map { + meta, reads -> + [ meta, [ reads ].flatten() ] + } + + } else { + ch_reads_runmerged = ch_shortreads_hostremoved + .mix( ch_longreads_preprocessed ) + } + /* SUBWORKFLOW: PROFILING */ - PROFILING ( ch_shortreads_filtered, ch_longreads_preprocessed, DB_CHECK.out.dbs ) + PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) /* @@ -151,21 +207,30 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.shortread_clipmerge) { + if (params.perform_shortread_clipmerge) { ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions ) } - if (params.longread_clip) { + if (params.perform_longread_clip) { ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) } - if (params.shortread_complexityfilter){ + if (params.perform_shortread_complexityfilter){ ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions ) } + if (params.perform_shortread_hostremoval) { + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions) + } + + if (params.perform_runmerging){ + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + } + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc ) // TODO create multiQC module for metaphlan