From b055df5ea076e2d82071eba9e3e692ce9d43e976 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sat, 2 Apr 2022 17:02:05 +0200
Subject: [PATCH 01/12] Add bbduk complexity (entropy-based) filtering

---
 CITATIONS.md                                  |  2 +
 conf/modules.config                           | 15 +++--
 modules.json                                  |  3 +
 modules/nf-core/modules/bbmap/bbduk/main.nf   | 43 ++++++++++++
 modules/nf-core/modules/bbmap/bbduk/meta.yml  | 52 +++++++++++++++
 nextflow.config                               |  7 ++
 nextflow_schema.json                          | 21 +++++-
 subworkflows/local/longread_preprocessing.nf  |  8 +--
 .../local/shortread_adapterremoval.nf         |  6 +-
 .../local/shortread_complexityfiltering.nf    | 28 ++++++++
 subworkflows/local/shortread_fastp.nf         | 10 +--
 subworkflows/local/shortread_preprocessing.nf |  6 +-
 workflows/taxprofiler.nf                      | 66 ++++++++++++-------
 13 files changed, 222 insertions(+), 45 deletions(-)
 create mode 100644 modules/nf-core/modules/bbmap/bbduk/main.nf
 create mode 100644 modules/nf-core/modules/bbmap/bbduk/meta.yml
 create mode 100644 subworkflows/local/shortread_complexityfiltering.nf

diff --git a/CITATIONS.md b/CITATIONS.md
index 8f286b0..0eb8141 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -26,6 +26,8 @@
 
 - [Porechop](https://github.com/rrwick/Porechop)
 
+- [BBTools](http://sourceforge.net/projects/bbmap/)
+
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
 
   > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
diff --git a/conf/modules.config b/conf/modules.config
index dc8b138..601f915 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -132,7 +132,6 @@ process {
         ]
     }
 
-
     withName: PORECHOP {
         ext.prefix = { "${meta.id}_${meta.run_accession}" }
         publishDir = [
@@ -142,11 +141,17 @@ process {
         ]
     }
 
-    withName: CAT_FASTQ {
+    withName: BBMAP_BBDUK {
+        ext.args =  [
+                "entropy=${params.shortread_complexityfilter_bbduk_entropy}",
+                "entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
+                params.shortread_complexityfilter_bbduk_mask ?  "entropymask=t" : "entropymask=f"
+            ].join(' ').trim()
+        ext.prefix = { "${meta.id}-${meta.run_accession}" }
         publishDir = [
-            path: { "${params.outdir}/prepared_sequences" },
-            mode: 'copy',
-            pattern: '*.fastq.gz'
+            path: { "${params.outdir}/bbduk/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{fastq.gz,log}'
         ]
     }
 
diff --git a/modules.json b/modules.json
index dcfbd3f..64e6c3c 100644
--- a/modules.json
+++ b/modules.json
@@ -6,6 +6,9 @@
             "adapterremoval": {
                 "git_sha": "f0800157544a82ae222931764483331a81812012"
             },
+            "bbmap/bbduk": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
             "cat/fastq": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
             },
diff --git a/modules/nf-core/modules/bbmap/bbduk/main.nf b/modules/nf-core/modules/bbmap/bbduk/main.nf
new file mode 100644
index 0000000..0ae005e
--- /dev/null
+++ b/modules/nf-core/modules/bbmap/bbduk/main.nf
@@ -0,0 +1,43 @@
+process BBMAP_BBDUK {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
+        'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path contaminants
+
+    output:
+    tuple val(meta), path('*.fastq.gz'), emit: reads
+    tuple val(meta), path('*.log')     , emit: log
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def raw      = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
+    def trimmed  = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
+    def contaminants_fa = contaminants ? "ref=$contaminants" : ''
+    """
+    maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
+    bbduk.sh \\
+        -Xmx\$maxmem \\
+        $raw \\
+        $trimmed \\
+        threads=$task.cpus \\
+        $args \\
+        $contaminants_fa \\
+        &> ${prefix}.bbduk.log
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bbmap: \$(bbversion.sh)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/bbmap/bbduk/meta.yml b/modules/nf-core/modules/bbmap/bbduk/meta.yml
new file mode 100644
index 0000000..6abd3d9
--- /dev/null
+++ b/modules/nf-core/modules/bbmap/bbduk/meta.yml
@@ -0,0 +1,52 @@
+name: bbmap_bbduk
+description: Adapter and quality trimming of sequencing reads
+keywords:
+  - trimming
+  - adapter trimming
+  - quality trimming
+tools:
+  - bbmap:
+      description: BBMap is a short read aligner, as well as various other bioinformatic tools.
+      homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
+      documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
+      tool_dev_url: None
+      doi: ""
+      licence: ["UC-LBL license (see package)"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - contaminants:
+      type: file
+      description: |
+        Reference files containing adapter and/or contaminant sequences for sequence kmer matching
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: The trimmed/modified fastq reads
+      pattern: "*fastq.gz"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - log:
+      type: file
+      description: Bbduk log file
+      pattern: "*bbduk.log"
+
+authors:
+  - "@MGordon09"
diff --git a/nextflow.config b/nextflow.config
index 7be36a6..d57743a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -65,6 +65,13 @@ params {
     shortread_clipmerge_minlength           = 15
     longread_clip                           = false
 
+    // Complexity filtering
+    shortread_complexityfilter                  = false
+    shortread_complexityfilter_tool             = 'bbduk'
+    shortread_complexityfilter_bbduk_entropy    = 0.3
+    shortread_complexityfilter_bbduk_windowsize = 50
+    shortread_complexityfilter_bbduk_mask       = false
+
     // MALT
     run_malt                   = false
     malt_mode                  = 'BlastN'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index fb2ca31..bd8b438 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -266,8 +266,7 @@
             "type": "boolean"
         },
         "shortread_clipmerge_excludeunmerged": {
-            "type": "boolean",
-            "default": false
+            "type": "boolean"
         },
         "longread_clip": {
             "type": "boolean"
@@ -304,6 +303,24 @@
         "shortread_clipmerge_minlength": {
             "type": "integer",
             "default": 15
+        },
+        "shortread_complexityfilter_tool": {
+            "type": "string",
+            "default": "bbduk"
+        },
+        "shortread_complexityfilter_bbduk_entropy": {
+            "type": "number",
+            "default": 0.3
+        },
+        "shortread_complexityfilter_bbduk_windowsize": {
+            "type": "integer",
+            "default": 50
+        },
+        "shortread_complexityfilter_bbduk_mask": {
+            "type": "boolean"
+        },
+        "shortread_complexityfilter": {
+            "type": "boolean"
         }
     }
 }
diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf
index a1515c7..2fa5f3b 100644
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@@ -1,6 +1,6 @@
-/*
-Process long raw reads with porechop
-*/
+//
+// Process long raw reads with porechop
+//
 
 include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
 include { PORECHOP                   } from '../../modules/nf-core/modules/porechop/main'
@@ -25,7 +25,7 @@ workflow LONGREAD_PREPROCESSING {
 
     FASTQC_PROCESSED ( PORECHOP.out.reads )
     ch_versions = ch_versions.mix(PORECHOP.out.versions.first())
-    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
+    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
 
 
     emit:
diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index 5e005db..b173e4c 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -1,6 +1,6 @@
-/*
-Process short raw reads with AdapterRemoval
-*/
+//
+// Process short raw reads with AdapterRemoval
+//
 
 include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE       } from '../../modules/nf-core/modules/adapterremoval/main'
 include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED       } from '../../modules/nf-core/modules/adapterremoval/main'
diff --git a/subworkflows/local/shortread_complexityfiltering.nf b/subworkflows/local/shortread_complexityfiltering.nf
new file mode 100644
index 0000000..3d69ed6
--- /dev/null
+++ b/subworkflows/local/shortread_complexityfiltering.nf
@@ -0,0 +1,28 @@
+//
+// Check input samplesheet and get read channels
+//
+
+include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
+
+workflow SHORTREAD_COMPLEXITYFILTERING {
+    take:
+    reads // [ [ meta ], [ reads ] ]
+
+    main:
+    ch_versions       = Channel.empty()
+    ch_multiqc_files  = Channel.empty()
+
+    if ( params.shortread_complexityfilter_tool == 'bbduk' ) {
+        ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
+        ch_versions        =  ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
+        ch_multiqc_files   =  ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
+    } else {
+        ch_filtered_reads = reads
+    }
+
+    emit:
+    reads    = ch_filtered_reads    // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions          // channel: [ versions.yml ]
+    mqc      = ch_multiqc_files
+}
+
diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf
index 48817db..18baf17 100644
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@@ -1,6 +1,6 @@
-/*
-Process short raw reads with FastP
-*/
+//
+// Process short raw reads with FastP
+//
 
 include { FASTP as FASTP_SINGLE       } from '../../modules/nf-core/modules/fastp/main'
 include { FASTP as FASTP_PAIRED       } from '../../modules/nf-core/modules/fastp/main'
@@ -44,8 +44,8 @@ workflow SHORTREAD_FASTP {
 
     ch_processed_reads = ch_fastp_reads_prepped
 
-    ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json.collect{it[1]} )
-    ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json.collect{it[1]} )
+    ch_multiqc_files = ch_multiqc_files.mix( FASTP_SINGLE.out.json )
+    ch_multiqc_files = ch_multiqc_files.mix( FASTP_PAIRED.out.json )
 
     emit:
     reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf
index 1d0caac..b0ac25e 100644
--- a/subworkflows/local/shortread_preprocessing.nf
+++ b/subworkflows/local/shortread_preprocessing.nf
@@ -1,5 +1,5 @@
 //
-// Check input samplesheet and get read channels
+// Perform read trimming and merging
 //
 
 
@@ -9,7 +9,7 @@ include { FASTQC as FASTQC_PROCESSED       } from '../../modules/nf-core/modules
 
 workflow SHORTREAD_PREPROCESSING {
     take:
-    reads // file: /path/to/samplesheet.csv
+    reads //  [ [ meta ], [ reads ] ]
 
     main:
     ch_versions       = Channel.empty()
@@ -29,7 +29,7 @@ workflow SHORTREAD_PREPROCESSING {
 
     FASTQC_PROCESSED ( ch_processed_reads )
     ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
-    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip.collect{it[1]} )
+    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
 
     emit:
     reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index ce89a91..a403589 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -40,9 +40,10 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
 //
 include { INPUT_CHECK             } from '../subworkflows/local/input_check'
 
-include { DB_CHECK                } from '../subworkflows/local/db_check'
-include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
-include { LONGREAD_PREPROCESSING  } from '../subworkflows/local/longread_preprocessing'
+include { DB_CHECK                      } from '../subworkflows/local/db_check'
+include { SHORTREAD_PREPROCESSING       } from '../subworkflows/local/shortread_preprocessing'
+include { LONGREAD_PREPROCESSING        } from '../subworkflows/local/longread_preprocessing'
+include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -61,7 +62,6 @@ include { CAT_FASTQ                   } from '../modules/nf-core/modules/cat/fas
 include { MALT_RUN                    } from '../modules/nf-core/modules/malt/run/main'
 include { KRAKEN2_KRAKEN2             } from '../modules/nf-core/modules/kraken2/kraken2/main'
 
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     RUN MAIN WORKFLOW
@@ -98,10 +98,6 @@ workflow TAXPROFILER {
 
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
-    CUSTOM_DUMPSOFTWAREVERSIONS (
-        ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    )
-
     /*
         SUBWORKFLOW: PERFORM PREPROCESSING
     */
@@ -114,17 +110,26 @@ workflow TAXPROFILER {
     if ( params.longread_clip ) {
         ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
                                         .map { it -> [ it[0], [it[1]] ] }
-    ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
     } else {
         ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
     }
 
+    /*
+        SUBWORKFLOW: COMPLEXITY FILTERING
+    */
+
+    if ( params.shortread_complexityfilter ) {
+        ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
+    } else {
+        ch_shortreads_filtered = ch_shortreads_preprocessed
+    }
+
     /*
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = ch_shortreads_preprocessed
+    ch_input_for_profiling = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )
             .combine(DB_CHECK.out.dbs)
             .branch {
@@ -177,6 +182,12 @@ workflow TAXPROFILER {
     /*
         MODULE: MultiQC
     */
+
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
+
+
     workflow_summary    = WorkflowTaxprofiler.paramsSummaryMultiqc(workflow, summary_params)
     ch_workflow_summary = Channel.value(workflow_summary)
 
@@ -188,21 +199,30 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
     if (params.shortread_clipmerge) {
-        ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
-    }
-    if (params.longread_clip) {
-        ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
-    }
-    if (params.run_kraken2) {
-        ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
-        ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())
-    }
-    if (params.run_malt) {
-        ch_multiqc_files = ch_multiqc_files.mix(MALT_RUN.out.log.collect{it[1]}.ifEmpty([]))
-        ch_versions = ch_versions.mix(MALT_RUN.out.versions.first())
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_shortclipmerge")
+        ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
+    }
+
+    if (params.longread_clip) {
+        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_longclipmerge")
+        ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
+    }
+
+    if (params.shortread_complexityfilter){
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_compelxity")
+        ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
+    }
+
+    if (params.run_kraken2) {
+        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_kraken")
+        ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
+    }
+
+    if (params.run_malt) {
+        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_malt")
+        ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
     }
 
-    // TODO MALT results overwriting per database?
     // TODO Versions for Karken/MALT not report?
     MULTIQC (
         ch_multiqc_files.collect()

From 7f1a7fb4e7be661e3a01ab0ffb0a61fe5de314ae Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sat, 2 Apr 2022 17:05:54 +0200
Subject: [PATCH 02/12] Add complexity filtering to test profile

---
 conf/test.config | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 92a10e4..bce5918 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,8 +24,9 @@ params {
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
     input               = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
     databases           = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    run_kraken2         = true
-    run_malt            = true
-    shortread_clipmerge = true
+    run_kraken2                   = true
+    run_malt                      = true
+    shortread_clipmerge           = true
+    shortread_complexityfiltering = true
 
 }

From 469e4f36825d86bf83a341d8c95bb4768f682bfd Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sat, 2 Apr 2022 17:07:30 +0200
Subject: [PATCH 03/12] Remove remaining debugging .dumps(

---
 workflows/taxprofiler.nf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index a403589..1c86edb 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -90,7 +90,7 @@ workflow TAXPROFILER {
     /*
         MODULE: Run FastQC
     */
-    ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore ).dump(tag: "input_to_fastq")
+    ch_input_for_fastqc = INPUT_CHECK.out.fastq.mix( INPUT_CHECK.out.nanopore )
 
     FASTQC (
         ch_input_for_fastqc
@@ -199,27 +199,27 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
     if (params.shortread_clipmerge) {
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_shortclipmerge")
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
     }
 
     if (params.longread_clip) {
-        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_longclipmerge")
+        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
     }
 
     if (params.shortread_complexityfilter){
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_compelxity")
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
     if (params.run_kraken2) {
-        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_kraken")
+        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
     }
 
     if (params.run_malt) {
-        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ).dump(tag: "mqc_malt")
+        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
     }
 

From 02d950c58c84f4e6c571cdb055154016fc78a22d Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sat, 2 Apr 2022 17:18:20 +0200
Subject: [PATCH 04/12] Fix test so complexity filter actually executes

---
 conf/test.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index bce5918..3e7530c 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -27,6 +27,6 @@ params {
     run_kraken2                   = true
     run_malt                      = true
     shortread_clipmerge           = true
-    shortread_complexityfiltering = true
+    shortread_complexityfilter    = true
 
 }

From d3572e18787eee40ed4e856bd6caf2f573246094 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sun, 3 Apr 2022 07:28:01 +0200
Subject: [PATCH 05/12] Final MQC fix for AR2 (remove too-early collect)

---
 subworkflows/local/shortread_adapterremoval.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index b173e4c..bbb108e 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -77,7 +77,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
 
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
-    ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]}, ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]} )
+    ch_multiqc_files = ch_multiqc_files.mix( ADAPTERREMOVAL_PAIRED.out.log, ADAPTERREMOVAL_SINGLE.out.log )
 
     emit:
     reads    = ch_processed_reads   // channel: [ val(meta), [ reads ] ]

From a384162810cb1fe1d800d7c3f2053f01c25083da Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 4 Apr 2022 21:16:51 +0200
Subject: [PATCH 06/12] Add prinseq as alternative complexity filtering

---
 CITATIONS.md                                  | 12 ++--
 conf/modules.config                           | 15 ++++-
 modules.json                                  |  5 +-
 .../nf-core/modules/prinseqplusplus/main.nf   | 61 +++++++++++++++++++
 .../nf-core/modules/prinseqplusplus/meta.yml  | 60 ++++++++++++++++++
 nextflow.config                               | 13 ++--
 nextflow_schema.json                          | 37 ++++++++---
 .../local/shortread_complexityfiltering.nf    |  6 +-
 8 files changed, 189 insertions(+), 20 deletions(-)
 create mode 100644 modules/nf-core/modules/prinseqplusplus/main.nf
 create mode 100644 modules/nf-core/modules/prinseqplusplus/meta.yml

diff --git a/CITATIONS.md b/CITATIONS.md
index 0eb8141..ec424b1 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -18,23 +18,27 @@
 
 - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
 
-  > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. “Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor.” Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
+  > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
 
 - [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
 
-  > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. “AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging.” BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
+  > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
 
 - [Porechop](https://github.com/rrwick/Porechop)
 
 - [BBTools](http://sourceforge.net/projects/bbmap/)
 
+- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
+
+  > Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
+
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
 
-  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. “Improved Metagenomic Analysis with Kraken 2.” Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
+  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
 
 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
 
-  > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. “Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico.” Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
+  > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
 
 ## Software packaging/containerisation tools
 
diff --git a/conf/modules.config b/conf/modules.config
index 601f915..9d2edc3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -143,7 +143,7 @@ process {
 
     withName: BBMAP_BBDUK {
         ext.args =  [
-                "entropy=${params.shortread_complexityfilter_bbduk_entropy}",
+                "entropy=${params.shortread_complexityfilter_entropy}",
                 "entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
                 params.shortread_complexityfilter_bbduk_mask ?  "entropymask=t" : "entropymask=f"
             ].join(' ').trim()
@@ -155,6 +155,19 @@ process {
         ]
     }
 
+    withName: PRINSEQPLUSPLUS {
+        ext.args =  [
+                params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
+                "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0"
+            ].join(' ').trim()
+        ext.prefix = { "${meta.id}-${meta.run_accession}" }
+        publishDir = [
+            path: { "${params.outdir}/prinseqplusplus/" },
+            mode: params.publish_dir_mode,
+            pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}'
+        ]
+    }
+
     withName: MALT_RUN {
         ext.args = { "${meta.db_params}" }
         ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
diff --git a/modules.json b/modules.json
index 64e6c3c..355dc9e 100644
--- a/modules.json
+++ b/modules.json
@@ -33,9 +33,12 @@
             "porechop": {
                 "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
             },
+            "prinseqplusplus": {
+                "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
+            },
             "untar": {
                 "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/modules/prinseqplusplus/main.nf b/modules/nf-core/modules/prinseqplusplus/main.nf
new file mode 100644
index 0000000..ebd8c58
--- /dev/null
+++ b/modules/nf-core/modules/prinseqplusplus/main.nf
@@ -0,0 +1,61 @@
+process PRINSEQPLUSPLUS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
+        'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*_good_out*.fastq.gz")                  , emit: good_reads
+    tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
+    tuple val(meta), path("*_bad_out*.fastq.gz")   , optional: true, emit: bad_reads
+    tuple val(meta), path("*.log")                                 , emit: log
+    path "versions.yml"                                            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    if (meta.single_end) {
+        """
+        prinseq++ \\
+            -threads $task.cpus \\
+            -fastq ${reads} \\
+            -out_name ${prefix} \\
+            -out_gz \\
+            -VERBOSE 1 \\
+            $args \\
+            | tee ${prefix}.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
+        END_VERSIONS
+        """
+    } else {
+        """
+        prinseq++ \\
+            -threads $task.cpus \\
+            -fastq ${reads[0]} \\
+            -fastq2 ${reads[1]} \\
+            -out_name ${prefix} \\
+            -out_gz \\
+            -VERBOSE 1 \\
+            $args \\
+            | tee ${prefix}.log
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/modules/prinseqplusplus/meta.yml b/modules/nf-core/modules/prinseqplusplus/meta.yml
new file mode 100644
index 0000000..8155df9
--- /dev/null
+++ b/modules/nf-core/modules/prinseqplusplus/meta.yml
@@ -0,0 +1,60 @@
+name: "prinseqplusplus"
+description: PRINSEQ++ is a C++ implementation of the prinseq-lite.pl program. It can be used to filter, reformat or trim genomic and metagenomic sequence data
+keywords:
+  - fastq
+  - fasta
+  - filter
+  - trim
+tools:
+  - "prinseqplusplus":
+      description: "PRINSEQ++ - Multi-threaded C++ sequence cleaning"
+      homepage: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
+      documentation: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
+      tool_dev_url: "https://github.com/Adrian-Cantu/PRINSEQ-plus-plus"
+      doi: "10.7287/peerj.preprints.27553v1"
+      licence: "['GPL v2']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end
+        data, respectively.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - good_reads:
+      type: file
+      description: Reads passing filter(s) in gzipped FASTQ format
+      pattern: "*_good_out_{R1,R2}.fastq.gz"
+  - single_reads:
+      type: file
+      description: |
+        Single reads without the pair passing filter(s) in gzipped FASTQ format
+      pattern: "*_single_out_{R1,R2}.fastq.gz"
+  - bad_reads:
+      type: file
+      description: |
+        Reads without not passing filter(s) in gzipped FASTQ format
+      pattern: "*_bad_out_{R1,R2}.fastq.gz"
+  - log:
+      type: file
+      description: |
+        Verbose level 2 STDOUT information in a log file
+      pattern: "*.log"
+
+authors:
+  - "@jfy133"
diff --git a/nextflow.config b/nextflow.config
index d57743a..03dfbb9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -66,11 +66,14 @@ params {
     longread_clip                           = false
 
     // Complexity filtering
-    shortread_complexityfilter                  = false
-    shortread_complexityfilter_tool             = 'bbduk'
-    shortread_complexityfilter_bbduk_entropy    = 0.3
-    shortread_complexityfilter_bbduk_windowsize = 50
-    shortread_complexityfilter_bbduk_mask       = false
+    shortread_complexityfilter                           = false
+    shortread_complexityfilter_tool                      = 'bbduk'
+    shortread_complexityfilter_entropy                   = 0.3
+    shortread_complexityfilter_bbduk_windowsize          = 50
+    shortread_complexityfilter_bbduk_mask                = false
+    shortread_complexityfilter_prinseqplusplus_mode      = 'entropy'
+    shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
+
 
     // MALT
     run_malt                   = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index bd8b438..6dee045 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,10 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
+                "input",
+                "outdir"
+            ],
             "properties": {
                 "input": {
                     "type": "string",
@@ -173,7 +176,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -284,7 +294,10 @@
         "shortread_clipmerge_tool": {
             "type": "string",
             "default": "fastp",
-            "enum": ["fastp", "adapterremoval"]
+            "enum": [
+                "fastp",
+                "adapterremoval"
+            ]
         },
         "shortread_clipmerge_skipadaptertrim": {
             "type": "boolean"
@@ -308,10 +321,6 @@
             "type": "string",
             "default": "bbduk"
         },
-        "shortread_complexityfilter_bbduk_entropy": {
-            "type": "number",
-            "default": 0.3
-        },
         "shortread_complexityfilter_bbduk_windowsize": {
             "type": "integer",
             "default": 50
@@ -321,6 +330,18 @@
         },
         "shortread_complexityfilter": {
             "type": "boolean"
+        },
+        "shortread_complexityfilter_entropy": {
+            "type": "number",
+            "default": 0.3
+        },
+        "shortread_complexityfilter_prinseqplusplus_mode": {
+            "type": "string",
+            "default": "entropy"
+        },
+        "shortread_complexityfilter_prinseqplusplus_dustscore": {
+            "type": "number",
+            "default": 0.5
         }
     }
-}
+}
\ No newline at end of file
diff --git a/subworkflows/local/shortread_complexityfiltering.nf b/subworkflows/local/shortread_complexityfiltering.nf
index 3d69ed6..12686d7 100644
--- a/subworkflows/local/shortread_complexityfiltering.nf
+++ b/subworkflows/local/shortread_complexityfiltering.nf
@@ -2,7 +2,8 @@
 // Check input samplesheet and get read channels
 //
 
-include { BBMAP_BBDUK } from '../../modules/nf-core/modules/bbmap/bbduk/main'
+include { BBMAP_BBDUK     } from '../../modules/nf-core/modules/bbmap/bbduk/main'
+include { PRINSEQPLUSPLUS } from '../../modules/nf-core/modules/prinseqplusplus/main'
 
 workflow SHORTREAD_COMPLEXITYFILTERING {
     take:
@@ -16,6 +17,9 @@ workflow SHORTREAD_COMPLEXITYFILTERING {
         ch_filtered_reads = BBMAP_BBDUK ( reads, [] ).reads
         ch_versions        =  ch_versions.mix( BBMAP_BBDUK.out.versions.first() )
         ch_multiqc_files   =  ch_multiqc_files.mix( BBMAP_BBDUK.out.log )
+    } else if ( params.shortread_complexityfilter_tool == 'prinseqplusplus' ) {
+        ch_filtered_reads = PRINSEQPLUSPLUS ( reads ).good_reads
+        ch_versions        =  ch_versions.mix( PRINSEQPLUSPLUS.out.versions.first() )
     } else {
         ch_filtered_reads = reads
     }

From 613f6a5565dbaaf6bef06ba980a6b017506490f7 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 4 Apr 2022 21:21:29 +0200
Subject: [PATCH 07/12] Prettier

---
 modules.json         |  2 +-
 nextflow_schema.json | 21 ++++-----------------
 2 files changed, 5 insertions(+), 18 deletions(-)

diff --git a/modules.json b/modules.json
index 355dc9e..d8ac3db 100644
--- a/modules.json
+++ b/modules.json
@@ -41,4 +41,4 @@
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6dee045..75e1abe 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,10 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "outdir"
-            ],
+            "required": ["input", "outdir"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -176,14 +173,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -294,10 +284,7 @@
         "shortread_clipmerge_tool": {
             "type": "string",
             "default": "fastp",
-            "enum": [
-                "fastp",
-                "adapterremoval"
-            ]
+            "enum": ["fastp", "adapterremoval"]
         },
         "shortread_clipmerge_skipadaptertrim": {
             "type": "boolean"
@@ -344,4 +331,4 @@
             "default": 0.5
         }
     }
-}
\ No newline at end of file
+}

From d46ddd972c2fc2c3ce0e7fe03b3ad42a4821332a Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 5 Apr 2022 11:04:30 +0200
Subject: [PATCH 08/12] Expand tests and fix schema after review

---
 .github/workflows/ci.yml |  8 ++++++++
 conf/test.config         | 13 +++++++------
 nextflow.config          |  2 +-
 nextflow_schema.json     |  3 ++-
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b8975b5..79148f0 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,8 +29,16 @@ jobs:
           - NXF_VER: ""
             NXF_EDGE: "1"
         parameters:
+          - "--longread_clip false"
+          - "--shortread_clip false"
           - "--shortread_clipmerge_tool fastp"
+          - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
+          - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
           - "--shortread_clipmerge_tool adapterremoval"
+          - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
+          - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
+          - "--shortread_complexityfilter_tool bbduk"
+          - "--shortread_complexityfilter_tool prinseq"
 
     steps:
       - name: Check out pipeline code
diff --git a/conf/test.config b/conf/test.config
index ff92fe7..bdad2c1 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,11 +22,12 @@ params {
     // Input data
     // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input               = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases           = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    run_kraken2         = true
-    run_malt            = true
-    run_metaphlan3      = true
-    shortread_clipmerge = true
+    input                         = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                     = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
+    run_kraken2                   = true
+    run_malt                      = true
+    run_metaphlan3                = true
+    shortread_clipmerge           = true
+    longread_clip                 = false
     shortread_complexityfilter    = true
 }
diff --git a/nextflow.config b/nextflow.config
index cd81f08..e6d6307 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -51,7 +51,7 @@ params {
     max_cpus                   = 16
     max_time                   = '240.h'
 
-    // Databaess
+    // Databases
     databases = null
 
     // FASTQ preprocessing
diff --git a/nextflow_schema.json b/nextflow_schema.json
index bec9ea4..6f6125e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -328,7 +328,8 @@
         },
         "shortread_complexityfilter_prinseqplusplus_mode": {
             "type": "string",
-            "default": "entropy"
+            "default": "entropy",
+            "enum": ["entropy", "dust"]
         },
         "shortread_complexityfilter_prinseqplusplus_dustscore": {
             "type": "number",

From 245a4d1f5dae7e29ec76aa1265154edbbe3eb3db Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 5 Apr 2022 13:08:44 +0200
Subject: [PATCH 09/12] Fix MQC staging and remove debugging dump

---
 subworkflows/local/shortread_adapterremoval.nf |  4 ++--
 workflows/taxprofiler.nf                       | 13 ++++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index 9d49b10..473a05f 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -117,8 +117,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
     ch_multiqc_files = ch_multiqc_files.mix(
-        ADAPTERREMOVAL_PAIRED.out.log.collect{it[1]},
-        ADAPTERREMOVAL_SINGLE.out.log.collect{it[1]}
+        ADAPTERREMOVAL_PAIRED.out.log,
+        ADAPTERREMOVAL_SINGLE.out.log
     )
 
     emit:
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index ddd5914..eb2b461 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -171,7 +171,6 @@ workflow TAXPROFILER {
                             }
 
     ch_input_for_metaphlan3 = ch_input_for_profiling.metaphlan3
-                            .dump(tag: "input_metaphlan3")
                             .multiMap {
                                 it ->
                                     reads: [it[0] + it[2], it[1]]
@@ -213,34 +212,34 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
     if (params.shortread_clipmerge) {
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "clipmerge") )
         ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
     }
 
     if (params.longread_clip) {
-        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "clip") )
         ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
     }
 
     if (params.shortread_complexityfilter){
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "complex") )
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
     if (params.run_kraken2) {
-        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]).dump(tag: "kraken") )
         ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
     }
 
     if (params.run_malt) {
-        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]).dump(tag: "malt") )
         ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
     }
 
     // TODO Versions for Karken/MALT not report?
     // TODO create multiQC module for metaphlan
     MULTIQC (
-        ch_multiqc_files.collect()
+        ch_multiqc_files.collect().dump(tag: "input_to_mqc")
     )
     multiqc_report = MULTIQC.out.report.toList()
     ch_versions    = ch_versions.mix(MULTIQC.out.versions)

From 5b9355725dea7af56745a28e6807ae5768798e8b Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 5 Apr 2022 13:14:06 +0200
Subject: [PATCH 10/12] Whoops

---
 workflows/taxprofiler.nf | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index eb2b461..8dfe996 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -17,7 +17,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
 if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
-if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
+if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
 if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
 
 /*
@@ -212,34 +212,34 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
     if (params.shortread_clipmerge) {
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "clipmerge") )
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
     }
 
     if (params.longread_clip) {
-        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "clip") )
+        ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
     }
 
     if (params.shortread_complexityfilter){
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]).dump(tag: "complex") )
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([])  )
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
     if (params.run_kraken2) {
-        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]).dump(tag: "kraken") )
+        ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])  )
         ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
     }
 
     if (params.run_malt) {
-        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]).dump(tag: "malt") )
+        ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([])  )
         ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
     }
 
     // TODO Versions for Karken/MALT not report?
     // TODO create multiQC module for metaphlan
     MULTIQC (
-        ch_multiqc_files.collect().dump(tag: "input_to_mqc")
+        ch_multiqc_files.collect()
     )
     multiqc_report = MULTIQC.out.report.toList()
     ch_versions    = ch_versions.mix(MULTIQC.out.versions)

From 98f082d7b6df9e7ef8da41b98bafab63770d3582 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 5 Apr 2022 13:17:54 +0200
Subject: [PATCH 11/12] Fix mistake in previous upstream merge with AR2 output
 channel for settings file

---
 subworkflows/local/shortread_adapterremoval.nf | 5 +++--
 workflows/taxprofiler.nf                       | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index 6512596..bfed76a 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -75,9 +75,10 @@ workflow SHORTREAD_ADAPTERREMOVAL {
 
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_SINGLE.out.versions.first() )
     ch_versions = ch_versions.mix( ADAPTERREMOVAL_PAIRED.out.versions.first() )
+
     ch_multiqc_files = ch_multiqc_files.mix(
-        ADAPTERREMOVAL_PAIRED.out.log,
-        ADAPTERREMOVAL_SINGLE.out.log
+        ADAPTERREMOVAL_PAIRED.out.settings,
+        ADAPTERREMOVAL_SINGLE.out.settings
     )
 
     emit:
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 8dfe996..3b08402 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -222,7 +222,7 @@ workflow TAXPROFILER {
     }
 
     if (params.shortread_complexityfilter){
-        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([])  )
+        ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 

From 82aa89ad63d8769989b533da4695dc9387d81355 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 5 Apr 2022 13:55:11 +0200
Subject: [PATCH 12/12] re add missing switch meta of merged reads to true

---
 subworkflows/local/shortread_adapterremoval.nf | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index bfed76a..b573be9 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -38,11 +38,17 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                 ADAPTERREMOVAL_PAIRED.out.singles_truncated,
                 ADAPTERREMOVAL_PAIRED.out.paired_truncated
             )
+            .map { meta, reads ->
+                def meta_new = meta.clone()
+                meta_new.single_end = true
+                [meta_new, reads]
+            }
             .groupTuple()
             // Paired-end reads cause a nested tuple during grouping.
             // We want to present a flat list of files to `CAT_FASTQ`.
             .map { meta, fastq -> [meta, fastq.flatten()] }
 
+
         CAT_FASTQ(ch_concat_fastq)
 
         ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads
@@ -56,10 +62,13 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                 ADAPTERREMOVAL_PAIRED.out.collapsed_truncated
             )
             .map { meta, reads ->
-                meta.single_end = true
-                [meta, reads]
+                def meta_new = meta.clone()
+                meta_new.single_end = true
+                [meta_new, reads]
             }
             .groupTuple()
+            .map { meta, fastq -> [meta, fastq.flatten()] }
+
 
         CAT_FASTQ(ch_concat_fastq)