Merge pull request #34 from nf-core/adapterremoval

Add AdapterRemoval as alternative trimer/merger
2024-11-24 20:19:55 +00:00 · 2022-04-02 10:33:45 +02:00 · 2022-04-02 10:33:45 +02:00 · 1dfbcacf68
commit 1dfbcacf68
parent 323883bd3e 93aaa9eeac
15 changed files with 346 additions and 35 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -28,6 +28,10 @@ jobs:
          # Test latest edge release of Nextflow
          - NXF_VER: ""
            NXF_EDGE: "1"
        parameters:
          - "--shortread_clipmerge_tool fastp"
          - "--shortread_clipmerge_tool adapterremoval"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2
@ -47,6 +51,6 @@ jobs:
        # For example: adding multiple test runs with different parameters
        # Remember that you can parallelise this by using strategy.matrix
        run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
 #
--- a/CITATIONS.md
+++ b/CITATIONS.md
@ -13,9 +13,26 @@
 - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
-* [Porechop](https://github.com/rrwick/Porechop)
+- [fastp](https://doi.org/10.1093/bioinformatics/bty560)
  > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
 - [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
  > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
 - [Porechop](https://github.com/rrwick/Porechop)
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
  > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
 ## Software packaging/containerisation tools
--- a/conf/modules.config
+++ b/conf/modules.config
@ -62,18 +62,15 @@ process {
        ]
    }
-    withName: FASTP {
+    withName: FASTP_SINGLE {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        ext.args   = [
            // collapsing options - option to retain singletons
            params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
            // trimming options
            params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
            params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
            !{ ${meta.single_end} } && params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : !{ ${meta.single_end} } ? "--detect_adapter_for_pe" : "",
            // filtering options
            "--length_required ${params.shortread_clipmerge_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/fastp" },
            mode: 'copy',
@ -81,6 +78,61 @@ process {
        ]
    }
    withName: FASTP_PAIRED {
        ext.args   = [
            // collapsing options - option to retain singletons
            params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
            // trimming options
            params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
            params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
            params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
            // filtering options
            "--length_required ${params.shortread_clipmerge_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/fastp" },
            mode: 'copy',
            pattern: '*.fastq.gz'
        ]
    }
    withName: ADAPTERREMOVAL_SINGLE {
        ext.args   = [
            // trimming options
            params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
            params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
            // filtering options
            "--minlength ${params.shortread_clipmerge_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/adapterremoval" },
            mode: 'copy',
            pattern: '*.fastq.gz'
        ]
    }
    withName: ADAPTERREMOVAL_PAIRED {
        ext.args   = [
            // collapsing options
            params.shortread_clipmerge_mergepairs ? "--collapse" : "",
            // trimming options
            params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
            params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
            params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
            // filtering options
            "--minlength ${params.shortread_clipmerge_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/adapterremoval" },
            mode: 'copy',
            pattern: '*.fastq.gz'
        ]
    }
    withName: PORECHOP {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
@ -99,23 +151,23 @@ process {
    }
    withName: MALT_RUN {
        ext.args = { "${meta.db_params}" }
        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/malt/${meta.db_name}" },
            mode: 'copy',
            pattern: '*.{rma6,tab,text,sam,log}'
        ]
        ext.args = { "${meta.db_params}" }
        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
    }
    withName: KRAKEN2_KRAKEN2 {
        ext.args = { "${meta.db_params}" }
        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/kraken2/${meta.db_name}" },
            mode: 'copy',
            pattern: '*.{fastq.gz,txt}'
        ]
        ext.args = { "${meta.db_params}" }
        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
    }
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
--- a/modules.json
+++ b/modules.json
@ -3,6 +3,9 @@
    "homePage": "https://github.com/nf-core/taxprofiler",
    "repos": {
        "nf-core/modules": {
            "adapterremoval": {
                "git_sha": "f0800157544a82ae222931764483331a81812012"
            },
            "cat/fastq": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
--- a/modules/nf-core/modules/adapterremoval/main.nf
+++ b/modules/nf-core/modules/adapterremoval/main.nf
@ -0,0 +1,70 @@
 process ADAPTERREMOVAL {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
        'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
    input:
    tuple val(meta), path(reads)
    path(adapterlist)
    output:
    tuple val(meta), path("${prefix}.truncated.gz")            , optional: true, emit: singles_truncated
    tuple val(meta), path("${prefix}.discarded.gz")            , optional: true, emit: discarded
    tuple val(meta), path("${prefix}.pair1.truncated.gz")      , optional: true, emit: pair1_truncated
    tuple val(meta), path("${prefix}.pair2.truncated.gz")      , optional: true, emit: pair2_truncated
    tuple val(meta), path("${prefix}.collapsed.gz")            , optional: true, emit: collapsed
    tuple val(meta), path("${prefix}.collapsed.truncated.gz")  , optional: true, emit: collapsed_truncated
    tuple val(meta), path("${prefix}.paired.gz")               , optional: true, emit: paired_interleaved
    tuple val(meta), path('*.log')                             , emit: log
    path "versions.yml"                                        , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
    prefix = task.ext.prefix ?: "${meta.id}"
    if (meta.single_end) {
        """
        AdapterRemoval  \\
            --file1 $reads \\
            $args \\
            $adapterlist \\
            --basename ${prefix} \\
            --threads ${task.cpus} \\
            --settings ${prefix}.log \\
            --seed 42 \\
            --gzip
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
        END_VERSIONS
        """
    } else {
        """
        AdapterRemoval  \\
            --file1 ${reads[0]} \\
            --file2 ${reads[1]} \\
            $args \\
            $adapterlist \\
            --basename ${prefix} \\
            --threads $task.cpus \\
            --settings ${prefix}.log \\
            --seed 42 \\
            --gzip
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
        END_VERSIONS
        """
    }
 }
--- a/modules/nf-core/modules/adapterremoval/meta.yml
+++ b/modules/nf-core/modules/adapterremoval/meta.yml
@ -0,0 +1,90 @@
 name: adapterremoval
 description: Trim sequencing adapters and collapse overlapping reads
 keywords:
  - trimming
  - adapters
  - merging
  - fastq
 tools:
  - adapterremoval:
      description: The AdapterRemoval v2 tool for merging and clipping reads.
      homepage: https://github.com/MikkelSchubert/adapterremoval
      documentation: https://adapterremoval.readthedocs.io
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
  - adapterlist:
      type: file
      description: Optional text file containing list of adapters to look for for removal
        with one adapter per line. Otherwise will look for default adapters (see
        AdapterRemoval man page), or can be modified to remove user-specified
        adapters via ext.args.
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - singles_truncated:
      type: file
      description: |
        Adapter trimmed FastQ files of either single-end reads, or singleton
        'orphaned' reads from merging of paired-end data (i.e., one of the pair
        was lost due to filtering thresholds).
      pattern: "*.truncated.gz"
  - discarded:
      type: file
      description: |
        Adapter trimmed FastQ files of reads that did not pass filtering
        thresholds.
      pattern: "*.discarded.gz"
  - pair1_truncated:
      type: file
      description: |
        Adapter trimmed R1 FastQ files of paired-end reads that did not merge
        with their respective R2 pair due to long templates. The respective pair
        is stored in 'pair2_truncated'.
      pattern: "*.pair1.truncated.gz"
  - pair2_truncated:
      type: file
      description: |
        Adapter trimmed R2 FastQ files of paired-end reads that did not merge
        with their respective R1 pair due to long templates. The respective pair
        is stored in 'pair1_truncated'.
      pattern: "*.pair2.truncated.gz"
  - collapsed:
      type: file
      description: |
        Collapsed FastQ of paired-end reads that successfully merged with their
        respective R1 pair but were not trimmed.
      pattern: "*.collapsed.gz"
  - collapsed_truncated:
      type: file
      description: |
        Collapsed FastQ of paired-end reads that successfully merged with their
        respective R1 pair and were trimmed of adapter due to sufficient overlap.
      pattern: "*.collapsed.truncated.gz"
  - log:
      type: file
      description: AdapterRemoval log file
      pattern: "*.log"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@maxibor"
  - "@jfy133"
--- a/nextflow.config
+++ b/nextflow.config
@ -59,7 +59,7 @@ params {
    shortread_clipmerge_tool                = 'fastp'
    shortread_clipmerge_skipadaptertrim     = false
    shortread_clipmerge_mergepairs          = false
-    shortread_clipmerge_excludeunmerged     = true
+    shortread_clipmerge_excludeunmerged     = false
    shortread_clipmerge_adapter1            = null
    shortread_clipmerge_adapter2            = null
    shortread_clipmerge_minlength           = 15
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -267,7 +267,7 @@
        },
        "shortread_clipmerge_excludeunmerged": {
            "type": "boolean",
-            "default": true
+            "default": false
        },
        "longread_clip": {
            "type": "boolean"
@ -284,7 +284,8 @@
        },
        "shortread_clipmerge_tool": {
            "type": "string",
-            "default": "fastp"
+            "default": "fastp",
            "enum": ["fastp", "adapterremoval"]
        },
        "shortread_clipmerge_skipadaptertrim": {
            "type": "boolean"
@ -294,11 +295,11 @@
        },
        "shortread_clipmerge_adapter1": {
            "type": "string",
-            "default": null
+            "default": "None"
        },
        "shortread_clipmerge_adapter2": {
            "type": "string",
-            "default": null
+            "default": "None"
        },
        "shortread_clipmerge_minlength": {
            "type": "integer",
--- a/subworkflows/local/db_check.nf
+++ b/subworkflows/local/db_check.nf
@ -18,9 +18,7 @@ workflow DB_CHECK {
    parsed_samplesheet = DATABASE_CHECK ( dbsheet )
        .csv
        .splitCsv ( header:true, sep:',' )
        .dump(tag: "db_split_csv_out")
        .map { create_db_channels(it) }
        .dump(tag: "db_channel_prepped")
    ch_dbs_for_untar = parsed_samplesheet
        .branch {
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@ -12,7 +12,6 @@ workflow INPUT_CHECK {
    parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
        .csv
        .splitCsv ( header:true, sep:',' )
        .dump(tag: "input_split_csv_out")
        .branch {
            fasta: it['fasta'] != ''
            nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
@ -21,17 +20,14 @@ workflow INPUT_CHECK {
    parsed_samplesheet.fastq
        .map { create_fastq_channel(it) }
        .dump(tag: "fastq_channel_init")
        .set { fastq }
    parsed_samplesheet.nanopore
        .map { create_fastq_channel(it) }
        .dump(tag: "fastq_nanopore_channel_init")
        .set { nanopore }
    parsed_samplesheet.fasta
        .map { create_fasta_channel(it) }
        .dump(tag: "fasta_channel_init")
        .set { fasta }
    emit:
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
    PORECHOP ( reads )
    ch_processed_reads = PORECHOP.out.reads
                                .dump(tag: "pre_fastqc_check")
                                .map {
                                        meta, reads ->
                                        def meta_new = meta.clone()
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@ -0,0 +1,86 @@
 /*
 Process short raw reads with AdapterRemoval
 */
 include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE       } from '../../modules/nf-core/modules/adapterremoval/main'
 include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED       } from '../../modules/nf-core/modules/adapterremoval/main'
 include { CAT_FASTQ                                     } from '../../modules/nf-core/modules/cat/fastq/main'
 workflow SHORTREAD_ADAPTERREMOVAL {
    take:
    reads // [[meta], [reads]]
    main:
    ch_versions = Channel.empty()
    ch_multiqc_files      = Channel.empty()
    ch_input_for_adapterremoval = reads
                                    .branch{
                                        single: it[0].single_end
                                        paired: !it[0].single_end
                                    }
    ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
    ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
    // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
    // has to be exported in a separate channel, and we must manually recombine when necessary
    if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
        ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
                                                .mix(
                                                    ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
                                                    ADAPTERREMOVAL_PAIRED.out.singles_truncated,
                                                    ADAPTERREMOVAL_PAIRED.out.pair1_truncated,
                                                    ADAPTERREMOVAL_PAIRED.out.pair2_truncated
                                                    )
                                                .map {
                                                    meta, reads ->
                                                        def meta_new = meta.clone()
                                                        meta_new.single_end = true
                                                        [ meta_new, reads ]
                                                    }
                                                    .groupTuple()
        ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
                                            .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
    } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
        ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
                                                .mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
                                                .map {
                                                    meta, reads ->
                                                        def meta_new = meta.clone()
                                                        meta_new['single_end'] = true
                                                        [ meta_new, reads ]
                                                    }
                                                    .groupTuple(by: 0)
        ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
                                            .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
    } else {
        ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.pair1_truncated
                                                .join( ADAPTERREMOVAL_PAIRED.out.pair2_truncated )
                                                .groupTuple()
                                                .map { meta, pair1, pair2 ->
                                                        [ meta, [ pair1, pair2 ].flatten() ]
                                                }
                                            .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
    }
    ch_processed_reads = ch_adapterremoval_reads_prepped
    ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
    ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
    ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
    emit:
    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
    versions = ch_versions          // channel: [ versions.yml ]
    mqc      = ch_multiqc_files
 }
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@ -14,15 +14,11 @@ workflow SHORTREAD_FASTP {
    ch_multiqc_files      = Channel.empty()
    ch_input_for_fastp = reads
                            .dump(tag: "pre-fastp_branch")
                            .branch{
                                single: it[0]['single_end'] == true
                                paired: it[0]['single_end'] == false
                            }
    ch_input_for_fastp.single.dump(tag: "input_fastp_single")
    ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
    FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
    // Last parameter here turns on merging of PE data
    FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
@ -46,13 +42,11 @@ workflow SHORTREAD_FASTP {
    ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first())
    ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first())
-    ch_processed_reads = ch_fastp_reads_prepped.dump(tag: "ch_fastp_reads_prepped")
+    ch_processed_reads = ch_fastp_reads_prepped
    ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
    ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
    ch_multiqc_files.dump(tag: "preprocessing_fastp_mqc_final")
    emit:
    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
    versions = ch_versions          // channel: [ versions.yml ]
--- a/subworkflows/local/shortread_preprocessing.nf
+++ b/subworkflows/local/shortread_preprocessing.nf
@ -4,6 +4,7 @@
 include { SHORTREAD_FASTP             } from './shortread_fastp'
 include { SHORTREAD_ADAPTERREMOVAL    } from './shortread_adapterremoval'
 include { FASTQC as FASTQC_PROCESSED       } from '../../modules/nf-core/modules/fastqc/main'
 workflow SHORTREAD_PREPROCESSING {
@ -18,6 +19,10 @@ workflow SHORTREAD_PREPROCESSING {
        ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
        ch_versions        =  ch_versions.mix( SHORTREAD_FASTP.out.versions )
        ch_multiqc_files   =  ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
    } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
        ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
        ch_versions        = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
        ch_multiqc_files   = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
    } else {
        ch_processed_reads = reads
    }
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
 if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
 if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -126,7 +127,6 @@ workflow TAXPROFILER {
    ch_input_for_profiling = ch_shortreads_preprocessed
            .mix( ch_longreads_preprocessed )
            .combine(DB_CHECK.out.dbs)
            .dump(tag: "reads_plus_db_clean")
            .branch {
                malt:    it[2]['tool'] == 'malt'
                kraken2: it[2]['tool'] == 'kraken2'
@ -141,9 +141,7 @@ workflow TAXPROFILER {
    // loading takes a long time, so we only want to run it once per database
    // TODO document somewhere we only accept illumina short reads for MALT?
    ch_input_for_malt =  ch_input_for_profiling.malt
                            .dump(tag: "input_to_malt_prefilter")
                            .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
                            .dump(tag: "input_to_malt_postfilter")
                            .map {
                                it ->
                                    def temp_meta =  [ id: it[2]['db_name']]  + it[2]
@ -151,7 +149,6 @@ workflow TAXPROFILER {
                                    [ temp_meta, it[1], db ]
                            }
                            .groupTuple(by: [0,2])
                            .dump(tag: "input_to_malt")
                            .multiMap {
                                it ->
                                    reads: [ it[0], it[1].flatten() ]
@ -160,7 +157,6 @@ workflow TAXPROFILER {
    // We can run Kraken2 one-by-one sample-wise
    ch_input_for_kraken2 =  ch_input_for_profiling.kraken2
                            .dump(tag: "input_to_kraken")
                            .multiMap {
                                it ->
                                    reads: [ it[0] + it[2], it[1] ]