diff --git a/conf/modules.config b/conf/modules.config index 29a5135..36bc626 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,12 +52,27 @@ process { ] } + withName: FASTQC_PROCESSED { + ext.args = '--quiet' + ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } + publishDir = [ + path: { "${params.outdir}/fastqc/processed" }, + mode: 'copy', + pattern: '*.html' + ] + } + withName: FASTP { ext.prefix = { "${meta.id}_${meta.run_accession}" } - // TODO also include option to NOT merge ext.args = [ - { ${meta.single_end} } == 0 ? "-m" : '', - params.shortread_excludeunmerged ? '' : "--include_unmerged" + // collapsing options + params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", + // trimming options + params.shortread_clipmerge_skiptrim ? "--disable_adapter_trimming" : "", + params.shortread_adapter1 ? "--adapter_sequence ${params.shortread_adapter1}" : "", + !{ ${meta.single_end} } && params.shortread_adapter2 ? "--adapter_sequence_r2 ${params.shortread_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "" + // filtering options + "--length_required ${params.shortread_clipmerge_minlength}" ].join(' ').trim() publishDir = [ path: { "${params.outdir}/fastp" }, diff --git a/nextflow.config b/nextflow.config index cc77a99..a312d0c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -55,9 +55,15 @@ params { databases = null // FASTQ preprocessing - shortread_clipmerge = false - shortread_excludeunmerged = true - longread_clip = false + shortread_clipmerge = false + shortread_clipmerge_tool = 'fastp' + shortread_clipmerge_skiptrim = false + shortread_clipmerge_mergepairs = false + shortread_clipmerge_excludeunmerged = true + shortread_clipmerge_adapter1 = null + shortread_clipmerge_adapter2 = null + shortread_clipmerge_minlength = 15 + longread_clip = false // MALT run_malt = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 9527da4..0fa217f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -265,7 +265,7 @@ "shortread_clipmerge": { "type": "boolean" }, - "shortread_excludeunmerged": { + "shortread_clipmerge_excludeunmerged": { "type": "boolean", "default": true }, diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf new file mode 100644 index 0000000..87aba25 --- /dev/null +++ b/subworkflows/local/shortread_fastp.nf @@ -0,0 +1,65 @@ +// +// Check input samplesheet and get read channels +// + + +include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main' +include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' + +workflow SHORTREAD_FASTP { + take: + reads // file: /path/to/samplesheet.csv + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + // + // STEP: Read clipping and merging + // + // TODO give option to retain singletons (probably fastp option likely) + // TODO move to subworkflow + + ch_input_for_fastp = reads + .dump(tag: "pre-fastp_branch") + .branch{ + single: it[0]['single_end'] == true + paired: it[0]['single_end'] == false + } + + ch_input_for_fastp.single.dump(tag: "input_fastp_single") + ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") + + FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) + FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) + + if ( params.shortread_clipmerge_mergepairs ) { + ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged + .mix( FASTP_SINGLE.out.reads ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] + } + } else { + ch_fastp_reads_prepped = FASTP_PAIRED.out.reads + .mix( FASTP_SINGLE.out.reads ) + } + + ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first()) + ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first()) + + ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped") + + ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) + ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) + + ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final") + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index d996a76..c31289d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -3,17 +3,16 @@ // -include { FASTP as FASTP_SINGLE } from '../../modules/nf-core/modules/fastp/main' -include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fastp/main' -include { FASTQC as FASTQC_POST } from '../../modules/nf-core/modules/fastqc/main' +include { SHORTREAD_FASTP } from './shortread_fastp' +include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' workflow SHORTREAD_PREPROCESSING { take: reads // file: /path/to/samplesheet.csv main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // STEP: Read clipping and merging @@ -22,50 +21,20 @@ workflow SHORTREAD_PREPROCESSING { // TODO give option to retain singletons (probably fastp option likely) // TODO move to subworkflow - - if ( params.shortread_clipmerge ) { - - ch_input_for_fastp = reads - .dump(tag: "pre-fastp_branch") - .branch{ - single: it[0]['single_end'] == true - paired: it[0]['single_end'] == false - } - - ch_input_for_fastp.single.dump(tag: "input_fastp_single") - ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") - - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) - FASTP_PAIRED ( ch_input_for_fastp.paired, false, true ) - - ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged - .mix( FASTP_SINGLE.out.reads ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] - } - - FASTQC_POST ( ch_fastp_reads_prepped ) - - ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first()) - ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first()) - - ch_processed_reads = ch_fastp_reads_prepped - - ch_multiqc_files = ch_multiqc_files.mix( FASTQC_POST.out.zip.collect{it[1]} ) - ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) - ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) - - ch_multiqc_files.dump(tag: "preprocessing_mqc_final") - + if ( params.shortread_clipmerge_tool == "fastp" ) { + ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) } else { ch_processed_reads = reads } + //FASTQC_PROCESSED ( ch_processed_reads ) + //ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) + //ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} ) emit: + // TODO: problem, this is being exported as a multi-channel output? This is why FASTQC is broken reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 29058e6..0a907bf 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -17,6 +17,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } +if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -135,6 +136,7 @@ workflow TAXPROFILER { CAT_FASTQ ( ch_processed_for_combine.combine ) + // TODO May need to flatten reads? ch_reads_for_profiling = ch_processed_for_combine.skip .dump(tag: "skip_combine") .mix( CAT_FASTQ.out.reads )