diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb6de77..a8078e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,10 @@ jobs: # Test latest edge release of Nextflow - NXF_VER: "" NXF_EDGE: "1" + parameters: + - "--shortread_clipmerge_tool fastp" + - "--shortread_clipmerge_tool adapterremoval" + steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -47,6 +51,6 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }} # diff --git a/CITATIONS.md b/CITATIONS.md index 1018ccd..8f286b0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -13,9 +13,26 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -* [Porechop](https://github.com/rrwick/Porechop) +- [fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560. + +- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2) + + > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2. + +- [Porechop](https://github.com/rrwick/Porechop) + +- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0) + + > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. + +- [MALT](https://doi.org/10.1038/s41559-017-0446-6) + + > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. ## Software packaging/containerisation tools diff --git a/conf/modules.config b/conf/modules.config index 71eaa75..dc8b138 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -62,18 +62,15 @@ process { ] } - withName: FASTP { - ext.prefix = { "${meta.id}_${meta.run_accession}" } + withName: FASTP_SINGLE { ext.args = [ - // collapsing options - option to retain singletons - params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", - !{ ${meta.single_end} } && params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "", // filtering options "--length_required ${params.shortread_clipmerge_minlength}" ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ path: { "${params.outdir}/fastp" }, mode: 'copy', @@ -81,6 +78,61 @@ process { ] } + withName: FASTP_PAIRED { + ext.args = [ + // collapsing options - option to retain singletons + params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe", + // filtering options + "--length_required ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/fastp" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_SINGLE { + ext.args = [ + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: ADAPTERREMOVAL_PAIRED { + ext.args = [ + // collapsing options + params.shortread_clipmerge_mergepairs ? "--collapse" : "", + // trimming options + params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "", + params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "", + params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "", + // filtering options + "--minlength ${params.shortread_clipmerge_minlength}" + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/adapterremoval" }, + mode: 'copy', + pattern: '*.fastq.gz' + ] + } + + withName: PORECHOP { ext.prefix = { "${meta.id}_${meta.run_accession}" } publishDir = [ @@ -99,23 +151,23 @@ process { } withName: MALT_RUN { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: 'copy', pattern: '*.{rma6,tab,text,sam,log}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { + ext.args = { "${meta.db_params}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: 'copy', pattern: '*.{fastq.gz,txt}' ] - ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/modules.json b/modules.json index 34ef9f1..dcfbd3f 100644 --- a/modules.json +++ b/modules.json @@ -3,6 +3,9 @@ "homePage": "https://github.com/nf-core/taxprofiler", "repos": { "nf-core/modules": { + "adapterremoval": { + "git_sha": "f0800157544a82ae222931764483331a81812012" + }, "cat/fastq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/adapterremoval/main.nf b/modules/nf-core/modules/adapterremoval/main.nf new file mode 100644 index 0000000..9d16b9c --- /dev/null +++ b/modules/nf-core/modules/adapterremoval/main.nf @@ -0,0 +1,70 @@ +process ADAPTERREMOVAL { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' : + 'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }" + + input: + tuple val(meta), path(reads) + path(adapterlist) + + output: + tuple val(meta), path("${prefix}.truncated.gz") , optional: true, emit: singles_truncated + tuple val(meta), path("${prefix}.discarded.gz") , optional: true, emit: discarded + tuple val(meta), path("${prefix}.pair1.truncated.gz") , optional: true, emit: pair1_truncated + tuple val(meta), path("${prefix}.pair2.truncated.gz") , optional: true, emit: pair2_truncated + tuple val(meta), path("${prefix}.collapsed.gz") , optional: true, emit: collapsed + tuple val(meta), path("${prefix}.collapsed.truncated.gz") , optional: true, emit: collapsed_truncated + tuple val(meta), path("${prefix}.paired.gz") , optional: true, emit: paired_interleaved + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def list = adapterlist ? "--adapter-list ${adapterlist}" : "" + prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + """ + AdapterRemoval \\ + --file1 $reads \\ + $args \\ + $adapterlist \\ + --basename ${prefix} \\ + --threads ${task.cpus} \\ + --settings ${prefix}.log \\ + --seed 42 \\ + --gzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } else { + """ + AdapterRemoval \\ + --file1 ${reads[0]} \\ + --file2 ${reads[1]} \\ + $args \\ + $adapterlist \\ + --basename ${prefix} \\ + --threads $task.cpus \\ + --settings ${prefix}.log \\ + --seed 42 \\ + --gzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } + +} diff --git a/modules/nf-core/modules/adapterremoval/meta.yml b/modules/nf-core/modules/adapterremoval/meta.yml new file mode 100644 index 0000000..5faad04 --- /dev/null +++ b/modules/nf-core/modules/adapterremoval/meta.yml @@ -0,0 +1,90 @@ +name: adapterremoval +description: Trim sequencing adapters and collapse overlapping reads +keywords: + - trimming + - adapters + - merging + - fastq +tools: + - adapterremoval: + description: The AdapterRemoval v2 tool for merging and clipping reads. + homepage: https://github.com/MikkelSchubert/adapterremoval + documentation: https://adapterremoval.readthedocs.io + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - adapterlist: + type: file + description: Optional text file containing list of adapters to look for for removal + with one adapter per line. Otherwise will look for default adapters (see + AdapterRemoval man page), or can be modified to remove user-specified + adapters via ext.args. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - singles_truncated: + type: file + description: | + Adapter trimmed FastQ files of either single-end reads, or singleton + 'orphaned' reads from merging of paired-end data (i.e., one of the pair + was lost due to filtering thresholds). + pattern: "*.truncated.gz" + - discarded: + type: file + description: | + Adapter trimmed FastQ files of reads that did not pass filtering + thresholds. + pattern: "*.discarded.gz" + - pair1_truncated: + type: file + description: | + Adapter trimmed R1 FastQ files of paired-end reads that did not merge + with their respective R2 pair due to long templates. The respective pair + is stored in 'pair2_truncated'. + pattern: "*.pair1.truncated.gz" + - pair2_truncated: + type: file + description: | + Adapter trimmed R2 FastQ files of paired-end reads that did not merge + with their respective R1 pair due to long templates. The respective pair + is stored in 'pair1_truncated'. + pattern: "*.pair2.truncated.gz" + - collapsed: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair but were not trimmed. + pattern: "*.collapsed.gz" + - collapsed_truncated: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair and were trimmed of adapter due to sufficient overlap. + pattern: "*.collapsed.truncated.gz" + - log: + type: file + description: AdapterRemoval log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@maxibor" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 6fde513..7be36a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,7 @@ params { shortread_clipmerge_tool = 'fastp' shortread_clipmerge_skipadaptertrim = false shortread_clipmerge_mergepairs = false - shortread_clipmerge_excludeunmerged = true + shortread_clipmerge_excludeunmerged = false shortread_clipmerge_adapter1 = null shortread_clipmerge_adapter2 = null shortread_clipmerge_minlength = 15 diff --git a/nextflow_schema.json b/nextflow_schema.json index adc5a73..fb2ca31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -267,7 +267,7 @@ }, "shortread_clipmerge_excludeunmerged": { "type": "boolean", - "default": true + "default": false }, "longread_clip": { "type": "boolean" @@ -284,7 +284,8 @@ }, "shortread_clipmerge_tool": { "type": "string", - "default": "fastp" + "default": "fastp", + "enum": ["fastp", "adapterremoval"] }, "shortread_clipmerge_skipadaptertrim": { "type": "boolean" @@ -294,11 +295,11 @@ }, "shortread_clipmerge_adapter1": { "type": "string", - "default": null + "default": "None" }, "shortread_clipmerge_adapter2": { "type": "string", - "default": null + "default": "None" }, "shortread_clipmerge_minlength": { "type": "integer", diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index bda5cfe..356915a 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -18,9 +18,7 @@ workflow DB_CHECK { parsed_samplesheet = DATABASE_CHECK ( dbsheet ) .csv .splitCsv ( header:true, sep:',' ) - .dump(tag: "db_split_csv_out") .map { create_db_channels(it) } - .dump(tag: "db_channel_prepped") ch_dbs_for_untar = parsed_samplesheet .branch { diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 4501386..ec404c2 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -12,7 +12,6 @@ workflow INPUT_CHECK { parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) - .dump(tag: "input_split_csv_out") .branch { fasta: it['fasta'] != '' nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' @@ -21,17 +20,14 @@ workflow INPUT_CHECK { parsed_samplesheet.fastq .map { create_fastq_channel(it) } - .dump(tag: "fastq_channel_init") .set { fastq } parsed_samplesheet.nanopore .map { create_fastq_channel(it) } - .dump(tag: "fastq_nanopore_channel_init") .set { nanopore } parsed_samplesheet.fasta .map { create_fasta_channel(it) } - .dump(tag: "fasta_channel_init") .set { fasta } emit: diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 7c7c24b..a1515c7 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING { PORECHOP ( reads ) ch_processed_reads = PORECHOP.out.reads - .dump(tag: "pre_fastqc_check") .map { meta, reads -> def meta_new = meta.clone() diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf new file mode 100644 index 0000000..5e005db --- /dev/null +++ b/subworkflows/local/shortread_adapterremoval.nf @@ -0,0 +1,86 @@ +/* +Process short raw reads with AdapterRemoval +*/ + +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../modules/nf-core/modules/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../modules/nf-core/modules/adapterremoval/main' +include { CAT_FASTQ } from '../../modules/nf-core/modules/cat/fastq/main' + +workflow SHORTREAD_ADAPTERREMOVAL { + + take: + reads // [[meta], [reads]] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + ch_input_for_adapterremoval = reads + .branch{ + single: it[0].single_end + paired: !it[0].single_end + } + + ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) + ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) + + // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file + // has to be exported in a separate channel, and we must manually recombine when necessary + + if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { + ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed + .mix( + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ADAPTERREMOVAL_PAIRED.out.singles_truncated, + ADAPTERREMOVAL_PAIRED.out.pair1_truncated, + ADAPTERREMOVAL_PAIRED.out.pair2_truncated + ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new.single_end = true + + [ meta_new, reads ] + } + .groupTuple() + + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + + } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { + ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed + .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated ) + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = true + + [ meta_new, reads ] + } + .groupTuple(by: 0) + + ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + + } else { + + ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated + .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated ) + .groupTuple() + .map { meta, pair1, pair2 -> + [ meta, [ pair1, pair2 ].flatten() ] + } + .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) + } + + ch_processed_reads = ch_adapterremoval_reads_prepped + + ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() ) + ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} ) + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index c2e435c..48817db 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP { ch_multiqc_files = Channel.empty() ch_input_for_fastp = reads - .dump(tag: "pre-fastp_branch") .branch{ single: it[0]['single_end'] == true paired: it[0]['single_end'] == false } - ch_input_for_fastp.single.dump(tag: "input_fastp_single") - ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") - FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) // Last parameter here turns on merging of PE data FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) @@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP { ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first()) ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first()) - ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped") + ch_processed_reads = ch_fastp_reads_prepped ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} ) ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} ) - ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final") - emit: reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 7fba0c0..1d0caac 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -4,6 +4,7 @@ include { SHORTREAD_FASTP } from './shortread_fastp' +include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' workflow SHORTREAD_PREPROCESSING { @@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING { ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) + } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) } else { ch_processed_reads = reads } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index af5c54d..ce89a91 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." +if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -126,7 +127,6 @@ workflow TAXPROFILER { ch_input_for_profiling = ch_shortreads_preprocessed .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) - .dump(tag: "reads_plus_db_clean") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' @@ -141,9 +141,7 @@ workflow TAXPROFILER { // loading takes a long time, so we only want to run it once per database // TODO document somewhere we only accept illumina short reads for MALT? ch_input_for_malt = ch_input_for_profiling.malt - .dump(tag: "input_to_malt_prefilter") .filter { it[0]['instrument_platform'] == 'ILLUMINA' } - .dump(tag: "input_to_malt_postfilter") .map { it -> def temp_meta = [ id: it[2]['db_name']] + it[2] @@ -151,7 +149,6 @@ workflow TAXPROFILER { [ temp_meta, it[1], db ] } .groupTuple(by: [0,2]) - .dump(tag: "input_to_malt") .multiMap { it -> reads: [ it[0], it[1].flatten() ] @@ -160,7 +157,6 @@ workflow TAXPROFILER { // We can run Kraken2 one-by-one sample-wise ch_input_for_kraken2 = ch_input_for_profiling.kraken2 - .dump(tag: "input_to_kraken") .multiMap { it -> reads: [ it[0] + it[2], it[1] ]