Add initial longread QC filtering - requires filtlong module update

2024-11-22 06:29:55 +00:00 · 2022-05-01 07:18:14 +02:00 · 2022-05-01 07:18:14 +02:00 · 031cb45934
commit 031cb45934
parent 2bae9d58ee
15 changed files with 233 additions and 74 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -29,18 +29,18 @@ jobs:
          - NXF_VER: ""
            NXF_EDGE: "1"
        parameters:
-          - "--perform_longread_clip false"
+          - "--perform_longread_qc false"
-          - "--perform_shortread_clipmerge false"
+          - "--perform_shortread_qc false"
-          - "--shortread_clipmerge_tool fastp"
+          - "--shortread_qc_tool fastp"
-          - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
+          - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
-          - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
+          - "--shortread_qc_tool fastp --shortread_qc_mergepairs"
-          - "--shortread_clipmerge_tool adapterremoval"
+          - "--shortread_qc_tool adapterremoval"
-          - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
+          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
-          - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
+          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
          - "--shortread_complexityfilter_tool bbduk"
          - "--shortread_complexityfilter_tool prinseq"
          - "--perform_runmerging"
-          - "--perform_runmerging --shortread_clipmerge_mergepairs"
+          - "--perform_runmerging --shortread_qc_mergepairs"
          - "--shortread_complexityfilter false --perform_shortread_hostremoval"
    steps:
--- a/CITATIONS.md
+++ b/CITATIONS.md
@ -56,6 +56,8 @@
 > Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
 - [FILTLONG](https://github.com/rrwick/Filtlong)
 ## Software packaging/containerisation tools
 - [Anaconda](https://anaconda.com)
--- a/conf/modules.config
+++ b/conf/modules.config
@ -51,10 +51,10 @@ process {
    withName: FASTP_SINGLE {
        ext.args   = [
            // trimming options
-            params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
+            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
-            params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
+            params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
            // filtering options
-            "--length_required ${params.shortread_clipmerge_minlength}"
+            "--length_required ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
@ -68,13 +68,13 @@ process {
    withName: FASTP_PAIRED {
        ext.args   = [
            // collapsing options - option to retain singletons
-            params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
+            params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
            // trimming options
-            params.shortread_clipmerge_skipadaptertrim ? "--disable_adapter_trimming" : "",
+            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
-            params.shortread_clipmerge_adapter1 ? "--adapter_sequence ${params.shortread_clipmerge_adapter1}" : "",
+            params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
-            params.shortread_clipmerge_adapter2 ? "--adapter_sequence_r2 ${params.shortread_clipmerge_adapter2}" : "--detect_adapter_for_pe",
+            params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
            // filtering options
-            "--length_required ${params.shortread_clipmerge_minlength}"
+            "--length_required ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
@ -88,10 +88,10 @@ process {
    withName: ADAPTERREMOVAL_SINGLE {
        ext.args   = [
            // trimming options
-            params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
+            params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
-            params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
+            params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
            // filtering options
-            "--minlength ${params.shortread_clipmerge_minlength}"
+            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
@ -105,13 +105,13 @@ process {
    withName: ADAPTERREMOVAL_PAIRED {
        ext.args   = [
            // collapsing options
-            params.shortread_clipmerge_mergepairs ? "--collapse" : "",
+            params.shortread_qc_mergepairs ? "--collapse" : "",
            // trimming options
-            params.shortread_clipmerge_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
+            params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
-            params.shortread_clipmerge_adapter1 ? "--adapter1 ${params.shortread_clipmerge_adapter1}" : "",
+            params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
-            params.shortread_clipmerge_adapter2 ? "--adapter2 ${params.shortread_clipmerge_adapter2}" : "",
+            params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
            // filtering options
-            "--minlength ${params.shortread_clipmerge_minlength}"
+            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
@ -132,6 +132,22 @@ process {
        ]
    }
    withName: FILTLONG {
        ext.args = [
            "--min_length ${params.longread_qc_minlength}",
            "--keep_percent ${params.longread_qc_keepbppercent}",
            "--target_bases ${params.longread_qc_targetnbases}"
        ]
        .join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/porechop" },
            mode: params.publish_dir_mode,
            pattern: '*.fastq.gz',
            enabled: params.save_preprocessed_reads
        ]
    }
    withName: BOWTIE2_BUILD {
        publishDir = [
            path: { "${params.outdir}/bowtie2/build" },
--- a/conf/test.config
+++ b/conf/test.config
@ -24,8 +24,8 @@ params {
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    perform_shortread_clipmerge           = true
+    perform_shortread_qc           = true
-    perform_longread_clip                 = false
+    perform_longread_qc                 = false
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    shortread_hostremoval_reference       = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
--- a/docs/usage.md
+++ b/docs/usage.md
@ -162,16 +162,16 @@ nf-core/taxprofiler offers four main preprocessing steps
 #### Read Processing
-Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_clipmerge` or `--perform_longread_clip` flags.
+Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
 It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
 There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
-For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_clipmerge_adapter1` and `--shortread_clipmerge_adapter2`)
+For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
-By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_clipmerge_mergepairs` and `--shortread_clipmerge_excludeunmerged`).
+By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
-You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_clipmerge_skipadaptertrim`).
+You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
-Both tools support length filtering of reads and can be tuned with `--shortread_clipmerge_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
+Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
 There is currently one option for long-read Oxford Nanopore processing: `porechop`.
--- a/modules.json
+++ b/modules.json
@ -36,6 +36,9 @@
            "fastqc": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "filtlong": {
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "kaiju/kaiju": {
                "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
            },
--- a/modules/nf-core/modules/filtlong/main.nf
+++ b/modules/nf-core/modules/filtlong/main.nf
@ -0,0 +1,36 @@
 process FILTLONG {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
        'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
    input:
    tuple val(meta), path(shortreads), path(longreads)
    output:
    tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads
    path "versions.yml"                                     , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
    """
    filtlong \\
        $short_reads \\
        $args \\
        $longreads \\
        | gzip -n > ${prefix}_lr_filtlong.fastq.gz
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/modules/filtlong/meta.yml
+++ b/modules/nf-core/modules/filtlong/meta.yml
@ -0,0 +1,50 @@
 name: filtlong
 description: Filtlong filters long reads based on quality measures or short read data.
 keywords:
  - nanopore
  - quality control
  - QC
  - filtering
  - long reads
  - short reads
 tools:
  - filtlong:
      description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
      homepage: https://anaconda.org/bioconda/filtlong
      documentation: None
      tool_dev_url: https://github.com/rrwick/Filtlong
      doi: ""
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - shortreads:
      type: file
      description: fastq file
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
  - longreads:
      type: file
      description: fastq file
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - reads:
      type: file
      description: Filtered (compressed) fastq file
      pattern: "*.fastq.gz"
 authors:
  - "@d4straub"
--- a/nextflow.config
+++ b/nextflow.config
@ -55,16 +55,23 @@ params {
    databases = null
    // FASTQ preprocessing
-    perform_shortread_clipmerge             = false
+    perform_shortread_qc             = false
-    shortread_clipmerge_tool                = 'fastp'
+    shortread_qc_tool                = 'fastp'
-    shortread_clipmerge_skipadaptertrim     = false
+    shortread_qc_skipadaptertrim     = false
-    shortread_clipmerge_mergepairs          = false
+    shortread_qc_mergepairs          = false
-    shortread_clipmerge_excludeunmerged     = false
+    shortread_qc_excludeunmerged     = false
-    shortread_clipmerge_adapter1            = null
+    shortread_qc_adapter1            = null
-    shortread_clipmerge_adapter2            = null
+    shortread_qc_adapter2            = null
-    shortread_clipmerge_minlength           = 15
+    shortread_qc_minlength           = 15
-    perform_longread_clip                   = false
+
-    save_preprocessed_reads                 = false
+    perform_longread_qc              = false
    longread_qc_run_clip             = false
    longread_qc_run_filter           = false
    longread_qc_minlength            = 1000
    longread_qc_keepbppercent        = 90
    longread_qc_targetnbases         = 500000000
    save_preprocessed_reads          = false
    // Complexity filtering
    perform_shortread_complexityfilter                   = false
@ -185,6 +192,7 @@ profiles {
 }
 // Load igenomes.config if required
 if (!params.igenomes_ignore) {
    includeConfig 'conf/igenomes.config'
 } else {
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -262,7 +262,7 @@
            "type": "string",
            "default": "None"
        },
-        "shortread_clipmerge_excludeunmerged": {
+        "shortread_qc_excludeunmerged": {
            "type": "boolean"
        },
        "run_malt": {
@ -291,26 +291,26 @@
            "type": "boolean",
            "description": "Enable MetaPhlAn for taxonomic profiling"
        },
-        "shortread_clipmerge_tool": {
+        "shortread_qc_tool": {
            "type": "string",
            "default": "fastp",
            "enum": ["fastp", "adapterremoval"]
        },
-        "shortread_clipmerge_skipadaptertrim": {
+        "shortread_qc_skipadaptertrim": {
            "type": "boolean"
        },
-        "shortread_clipmerge_mergepairs": {
+        "shortread_qc_mergepairs": {
            "type": "boolean"
        },
-        "shortread_clipmerge_adapter1": {
+        "shortread_qc_adapter1": {
            "type": "string",
            "default": "None"
        },
-        "shortread_clipmerge_adapter2": {
+        "shortread_qc_adapter2": {
            "type": "string",
            "default": "None"
        },
-        "shortread_clipmerge_minlength": {
+        "shortread_qc_minlength": {
            "type": "integer",
            "default": 15
        },
@ -347,10 +347,10 @@
        "save_runmerged_reads": {
            "type": "boolean"
        },
-        "perform_shortread_clipmerge": {
+        "perform_shortread_qc": {
            "type": "boolean"
        },
-        "perform_longread_clip": {
+        "perform_longread_qc": {
            "type": "boolean"
        },
        "perform_shortread_complexityfilter": {
@ -397,6 +397,24 @@
            "type": "string",
            "default": "tsv",
            "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"]
        },
        "longread_qc_run_clip": {
            "type": "boolean"
        },
        "longread_qc_run_filter": {
            "type": "boolean"
        },
        "longread_qc_minlength": {
            "type": "integer",
            "default": 1000
        },
        "longread_qc_keepbppercent": {
            "type": "integer",
            "default": 90
        },
        "longread_qc_targetnbases": {
            "type": "integer",
            "default": 500000000
        }
    }
 }
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@ -4,6 +4,7 @@
 include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
 include { PORECHOP                   } from '../../modules/nf-core/modules/porechop/main'
 include { FILTLONG                   } from '../../modules/nf-core/modules/filtlong/main'
 workflow LONGREAD_PREPROCESSING {
    take:
@ -13,21 +14,43 @@ workflow LONGREAD_PREPROCESSING {
    ch_versions      = Channel.empty()
    ch_multiqc_files = Channel.empty()
-    PORECHOP ( reads )
+    if ( params.longread_qc_run_clip && !params.longread_qc_run_filter ) {
        PORECHOP ( reads )
-    ch_processed_reads = PORECHOP.out.reads
+        ch_processed_reads = PORECHOP.out.reads
-                                .map {
+                                        .map {
-                                        meta, reads ->
+                                                meta, reads ->
-                                        def meta_new = meta.clone()
+                                                def meta_new = meta.clone()
-                                        meta_new['single_end'] = 1
+                                                meta_new['single_end'] = 1
-                                        [ meta_new, reads ]
+                                                [ meta_new, reads ]
                                    }
-    FASTQC_PROCESSED ( PORECHOP.out.reads )
+        ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
-    ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
+                                            }
    } else if ( !params.longread_qc_run_clip && params.longread_qc_run_filter ) {
        ch_processed_reads = FILTLONG ( reads.map{ meta, reads -> [meta, [], reads ]} )
        ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
    } else {
        PORECHOP ( reads )
        ch_clipped_reads = PORECHOP.out.reads
                                        .map {
                                                meta, reads ->
                                                def meta_new = meta.clone()
                                                meta_new['single_end'] = 1
                                                [ meta_new, reads ]
                                        }
        ch_processed_reads = FILTLONG ( ch_clipped_reads.map{ meta, reads -> [meta, [], reads ]} ).reads
        ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
        ch_versions = ch_versions.mix(FILTLONG.out.versions.first())
    }
    FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
    emit:
    reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
    versions = ch_versions          // channel: [ versions.yml ]
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@ -29,7 +29,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
     * has to be exported in a separate channel and we must manually recombine when necessary.
     */
-    if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
+    if ( params.shortread_qc_mergepairs && !params.shortread_qc_excludeunmerged ) {
        ch_concat_fastq = Channel.empty()
            .mix(
@ -54,7 +54,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
        ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
            .mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated)
-    } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
+    } else if ( params.shortread_qc_mergepairs && params.shortread_qc_excludeunmerged ) {
        ch_concat_fastq = Channel.empty()
            .mix(
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@ -21,9 +21,9 @@ workflow SHORTREAD_FASTP {
    FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
    // Last parameter here turns on merging of PE data
-    FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
+    FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_qc_mergepairs )
-    if ( params.shortread_clipmerge_mergepairs ) {
+    if ( params.shortread_qc_mergepairs ) {
        ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged
                                        .map {
                                            meta, reads ->
--- a/subworkflows/local/shortread_preprocessing.nf
+++ b/subworkflows/local/shortread_preprocessing.nf
@ -15,11 +15,11 @@ workflow SHORTREAD_PREPROCESSING {
    ch_versions       = Channel.empty()
    ch_multiqc_files  = Channel.empty()
-    if ( params.shortread_clipmerge_tool == "fastp" ) {
+    if ( params.shortread_qc_tool == "fastp" ) {
        ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
        ch_versions        =  ch_versions.mix( SHORTREAD_FASTP.out.versions )
        ch_multiqc_files   =  ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
-    } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
+    } else if ( params.shortread_qc_tool == "adapterremoval" ) {
        ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
        ch_versions        = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
        ch_multiqc_files   = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -19,8 +19,11 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
-if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
+
-if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
+if (params.shortread_qc_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] MALT does not accept uncollapsed paired-short reads. Pairs will be profiled as separate files."
 if (params.shortread_qc_excludeunmerged && !params.shortread_qc_mergepairs) exit 1, "ERROR: [nf-core/taxprofiler] cannot include unmerged reads when merging not turned on. Please specify --shortread_qc_mergepairs"
 if ( (params.longread_qc_run_clip || params.longread_qc_run_filter) & !params.perform_longread_qc ) exit 1, "ERROR: [nf-core/taxprofiler] --longread_qc_run_clip or --longread_qc_run_filter requested but quality-control not turned on. Please specify --perform_long_qc"
 if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
 if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "ERROR: [nf-core/taxprofiler] --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
@ -110,14 +113,14 @@ workflow TAXPROFILER {
    /*
        SUBWORKFLOW: PERFORM PREPROCESSING
    */
-    if ( params.perform_shortread_clipmerge ) {
+    if ( params.perform_shortread_qc ) {
        ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
        ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
    } else {
        ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
    }
-    if ( params.perform_longread_clip ) {
+    if ( params.perform_longread_qc ) {
        ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
                                        .map { it -> [ it[0], [it[1]] ] }
        ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
@ -211,11 +214,11 @@ workflow TAXPROFILER {
    ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
-    if (params.perform_shortread_clipmerge) {
+    if (params.perform_shortread_qc) {
        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
    }
-    if (params.perform_longread_clip) {
+    if (params.perform_longread_qc) {
        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
    }