From 482112bb42ddd314de8e92b6a3c1a94e530828f7 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sun, 3 Apr 2022 07:58:40 +0200
Subject: [PATCH 01/25] Start work on host removal

---
 modules.json                                  | 11 ++-
 modules/nf-core/modules/bowtie2/align/main.nf | 77 +++++++++++++++++++
 .../nf-core/modules/bowtie2/align/meta.yml    | 51 ++++++++++++
 modules/nf-core/modules/bowtie2/build/main.nf | 30 ++++++++
 .../nf-core/modules/bowtie2/build/meta.yml    | 33 ++++++++
 .../nf-core/modules/samtools/flagstat/main.nf | 34 ++++++++
 .../modules/samtools/flagstat/meta.yml        | 49 ++++++++++++
 nextflow.config                               |  4 +
 subworkflows/local/shortread_hostremoval.nf   | 39 ++++++++++
 workflows/taxprofiler.nf                      | 12 ++-
 10 files changed, 336 insertions(+), 4 deletions(-)
 create mode 100644 modules/nf-core/modules/bowtie2/align/main.nf
 create mode 100644 modules/nf-core/modules/bowtie2/align/meta.yml
 create mode 100644 modules/nf-core/modules/bowtie2/build/main.nf
 create mode 100644 modules/nf-core/modules/bowtie2/build/meta.yml
 create mode 100644 modules/nf-core/modules/samtools/flagstat/main.nf
 create mode 100644 modules/nf-core/modules/samtools/flagstat/meta.yml
 create mode 100644 subworkflows/local/shortread_hostremoval.nf

diff --git a/modules.json b/modules.json
index dcfbd3f..7c3facc 100644
--- a/modules.json
+++ b/modules.json
@@ -6,6 +6,12 @@
             "adapterremoval": {
                 "git_sha": "f0800157544a82ae222931764483331a81812012"
             },
+            "bowtie2/align": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
+            "bowtie2/build": {
+                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            },
             "cat/fastq": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
             },
@@ -30,9 +36,12 @@
             "porechop": {
                 "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
             },
+            "samtools/flagstat": {
+                "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35"
+            },
             "untar": {
                 "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf
new file mode 100644
index 0000000..7e8a965
--- /dev/null
+++ b/modules/nf-core/modules/bowtie2/align/main.nf
@@ -0,0 +1,77 @@
+process BOWTIE2_ALIGN {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' :
+        'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path  index
+    val   save_unaligned
+
+    output:
+    tuple val(meta), path('*.bam')    , emit: bam
+    tuple val(meta), path('*.log')    , emit: log
+    tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true
+    path  "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    if (meta.single_end) {
+        def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
+        """
+        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
+        bowtie2 \\
+            -x \$INDEX \\
+            -U $reads \\
+            --threads $task.cpus \\
+            $unaligned \\
+            $args \\
+            2> ${prefix}.bowtie2.log \\
+            | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+        END_VERSIONS
+        """
+    } else {
+        def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
+        """
+        INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
+        bowtie2 \\
+            -x \$INDEX \\
+            -1 ${reads[0]} \\
+            -2 ${reads[1]} \\
+            --threads $task.cpus \\
+            $unaligned \\
+            $args \\
+            2> ${prefix}.bowtie2.log \\
+            | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam -
+
+        if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
+            mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
+        fi
+        if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
+            mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
+        fi
+
+        cat <<-END_VERSIONS > versions.yml
+        "${task.process}":
+            bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
+            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+            pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml
new file mode 100644
index 0000000..f80421e
--- /dev/null
+++ b/modules/nf-core/modules/bowtie2/align/meta.yml
@@ -0,0 +1,51 @@
+name: bowtie2_align
+description: Align reads to a reference genome using bowtie2
+keywords:
+  - align
+  - fasta
+  - genome
+  - reference
+tools:
+  - bowtie2:
+      description: |
+        Bowtie 2 is an ultrafast and memory-efficient tool for aligning
+        sequencing reads to long reference sequences.
+      homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
+      documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
+      doi: 10.1038/nmeth.1923
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - index:
+      type: file
+      description: Bowtie2 genome index files
+      pattern: "*.ebwt"
+output:
+  - bam:
+      type: file
+      description: Output BAM file containing read alignments
+      pattern: "*.{bam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - fastq:
+      type: file
+      description: Unaligned FastQ files
+      pattern: "*.fastq.gz"
+  - log:
+      type: file
+      description: Aligment log
+      pattern: "*.log"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf
new file mode 100644
index 0000000..a4da62d
--- /dev/null
+++ b/modules/nf-core/modules/bowtie2/build/main.nf
@@ -0,0 +1,30 @@
+process BOWTIE2_BUILD {
+    tag "$fasta"
+    label 'process_high'
+
+    conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' :
+        'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }"
+
+    input:
+    path fasta
+
+    output:
+    path 'bowtie2'      , emit: index
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    mkdir bowtie2
+    bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml
new file mode 100644
index 0000000..2da9a21
--- /dev/null
+++ b/modules/nf-core/modules/bowtie2/build/meta.yml
@@ -0,0 +1,33 @@
+name: bowtie2_build
+description: Builds bowtie index for reference genome
+keywords:
+  - build
+  - index
+  - fasta
+  - genome
+  - reference
+tools:
+  - bowtie2:
+      description: |
+        Bowtie 2 is an ultrafast and memory-efficient tool for aligning
+        sequencing reads to long reference sequences.
+      homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
+      documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
+      doi: 10.1038/nmeth.1923
+      licence: ["GPL-3.0-or-later"]
+input:
+  - fasta:
+      type: file
+      description: Input genome fasta file
+output:
+  - index:
+      type: file
+      description: Bowtie2 genome index files
+      pattern: "*.bt2"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf
new file mode 100644
index 0000000..9e3440a
--- /dev/null
+++ b/modules/nf-core/modules/samtools/flagstat/main.nf
@@ -0,0 +1,34 @@
+process SAMTOOLS_FLAGSTAT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::samtools=1.15" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' :
+        'quay.io/biocontainers/samtools:1.15--h1170115_1' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai)
+
+    output:
+    tuple val(meta), path("*.flagstat"), emit: flagstat
+    path  "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    samtools \\
+        flagstat \\
+        --threads ${task.cpus-1} \\
+        $bam \\
+        > ${bam}.flagstat
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml
new file mode 100644
index 0000000..9526906
--- /dev/null
+++ b/modules/nf-core/modules/samtools/flagstat/meta.yml
@@ -0,0 +1,49 @@
+name: samtools_flagstat
+description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
+keywords:
+  - stats
+  - mapping
+  - counts
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: hhttp://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - bai:
+      type: file
+      description: Index for BAM/CRAM/SAM file
+      pattern: "*.{bai,crai,sai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - flagstat:
+      type: file
+      description: File containing samtools flagstat output
+      pattern: "*.{flagstat}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
diff --git a/nextflow.config b/nextflow.config
index 7be36a6..8aa8722 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -65,6 +65,10 @@ params {
     shortread_clipmerge_minlength           = 15
     longread_clip                           = false
 
+    // Host Removal
+    shortread_hostremoval_reference = null
+    shortread_hostremoval_index     = null
+
     // MALT
     run_malt                   = false
     malt_mode                  = 'BlastN'
diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf
new file mode 100644
index 0000000..4b0861c
--- /dev/null
+++ b/subworkflows/local/shortread_hostremoval.nf
@@ -0,0 +1,39 @@
+//
+// Remove host reads via alignment and export off-target reads
+//
+
+include { BOWTIE2_ALIGN             } from '../../../modules/nf-core/modules/bowtie2/align/main'
+include { BOWTIE2_BUILD             } from '../../../modules/nf-core/modules/bowtie2/build/main'
+include { SAMTOOLS_VIEW             } from '../../../modules/nf-core/modules/samtools/view/main'
+include { SAMTOOLS_FASTQ            } from '../../../modules/nf-core/modules/samtools/fastq/main'
+include { SAMTOOLS_FLAGSTAT         } from '../../../modules/nf-core/modules/samtools/flagstat/main'
+
+workflow SHORTREAD_PREPROCESSING {
+    take:
+    reads     // [ [ meta ], [ reads ] ]
+    reference // /path/to/fasta
+
+    main:
+    ch_versions       = Channel.empty()
+    ch_multiqc_files  = Channel.empty()
+
+    if ( !params.shortread_hostremoval_index ) {
+        file( , checkIfExists: true )
+        BOWTIE2_BUILD ( reference )
+        ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
+    }
+
+    BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index )
+    ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
+    ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log )
+
+    SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam )
+    ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions )
+    ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat )
+
+    emit:
+    reads    = BOWTIE2_ALIGN.out.fastq   // channel: [ val(meta), [ reads ] ]
+    versions = ch_versions          // channel: [ versions.yml ]
+    mqc      = ch_multiqc_files
+}
+
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index ce89a91..c5678f1 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -11,15 +11,21 @@ WorkflowTaxprofiler.initialise(params, log)
 
 // TODO nf-core: Add all file path parameters for the pipeline to the list below
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.databases, params.multiqc_config ]
+def checkPathParamList = [ params.input, params.databases, params.shortread_hostremoval_reference,
+                            params.shortread_hostremoval_index, params.multiqc_config
+                        ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
-if (params.input    ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
-if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
+if (params.input                      ) { ch_input     = file(params.input)     } else { exit 1, 'Input samplesheet not specified!' }
+if (params.databases                  ) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
 if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
 if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
 
+// TODO Add check if index but no reference exit 1
+if (params.shortread_hostremoval_reference   ) { ch_reference       = file(params.shortread_hostremoval_reference) } else {    }
+if (params.shortread_hostremoval_index)        { ch_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_reference_index = [] }
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     CONFIG FILES

From 066ceb2bcaeb21c0a2a07fd1a5359e30277869ce Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sun, 3 Apr 2022 17:23:14 +0200
Subject: [PATCH 02/25] Remove flagstat as bowtie2 reports this itself

---
 conf/modules.config                           | 19 ++++++-
 conf/test.config                              | 12 +++--
 modules.json                                  |  3 --
 .../nf-core/modules/samtools/flagstat/main.nf | 34 -------------
 .../modules/samtools/flagstat/meta.yml        | 49 -------------------
 nextflow.config                               |  1 +
 nextflow_schema.json                          | 14 +++++-
 subworkflows/local/shortread_hostremoval.nf   | 29 +++++------
 workflows/taxprofiler.nf                      | 27 ++++++++--
 9 files changed, 72 insertions(+), 116 deletions(-)
 delete mode 100644 modules/nf-core/modules/samtools/flagstat/main.nf
 delete mode 100644 modules/nf-core/modules/samtools/flagstat/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index dc8b138..41faa62 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -132,7 +132,6 @@ process {
         ]
     }
 
-
     withName: PORECHOP {
         ext.prefix = { "${meta.id}_${meta.run_accession}" }
         publishDir = [
@@ -142,6 +141,24 @@ process {
         ]
     }
 
+    withName: BOWTIE2_BUILD {
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            path: { "${params.outdir}/bowtie2/build" },
+            mode: 'copy',
+            pattern: '*.bt2'
+        ]
+    }
+
+    withName: BOWTIE2_ALIGN {
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            path: { "${params.outdir}/bowtie2/align" },
+            mode: 'copy',
+            pattern: '*.{fastq.gz,bam}'
+        ]
+    }
+
     withName: CAT_FASTQ {
         publishDir = [
             path: { "${params.outdir}/prepared_sequences" },
diff --git a/conf/test.config b/conf/test.config
index 92a10e4..90ea241 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,10 +22,12 @@ params {
     // Input data
     // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input               = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-    databases           = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    run_kraken2         = true
-    run_malt            = true
-    shortread_clipmerge = true
+    input                           = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                       = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
+    run_kraken2                     = true
+    run_malt                        = true
+    shortread_clipmerge             = true
+    shortread_hostremoval           = true
+    shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
 
 }
diff --git a/modules.json b/modules.json
index 7c3facc..7395d68 100644
--- a/modules.json
+++ b/modules.json
@@ -36,9 +36,6 @@
             "porechop": {
                 "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
             },
-            "samtools/flagstat": {
-                "git_sha": "1ad73f1b2abdea9398680d6d20014838135c9a35"
-            },
             "untar": {
                 "git_sha": "e080f4c8acf5760039ed12ec1f206170f3f9a918"
             }
diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf
deleted file mode 100644
index 9e3440a..0000000
--- a/modules/nf-core/modules/samtools/flagstat/main.nf
+++ /dev/null
@@ -1,34 +0,0 @@
-process SAMTOOLS_FLAGSTAT {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda (params.enable_conda ? "bioconda::samtools=1.15" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/samtools:1.15--h1170115_1' :
-        'quay.io/biocontainers/samtools:1.15--h1170115_1' }"
-
-    input:
-    tuple val(meta), path(bam), path(bai)
-
-    output:
-    tuple val(meta), path("*.flagstat"), emit: flagstat
-    path  "versions.yml"               , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    """
-    samtools \\
-        flagstat \\
-        --threads ${task.cpus-1} \\
-        $bam \\
-        > ${bam}.flagstat
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml
deleted file mode 100644
index 9526906..0000000
--- a/modules/nf-core/modules/samtools/flagstat/meta.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: samtools_flagstat
-description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type
-keywords:
-  - stats
-  - mapping
-  - counts
-  - bam
-  - sam
-  - cram
-tools:
-  - samtools:
-      description: |
-        SAMtools is a set of utilities for interacting with and post-processing
-        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
-        These files are generated as output by short read aligners like BWA.
-      homepage: http://www.htslib.org/
-      documentation: hhttp://www.htslib.org/doc/samtools.html
-      doi: 10.1093/bioinformatics/btp352
-      licence: ["MIT"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - bam:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-  - bai:
-      type: file
-      description: Index for BAM/CRAM/SAM file
-      pattern: "*.{bai,crai,sai}"
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - flagstat:
-      type: file
-      description: File containing samtools flagstat output
-      pattern: "*.{flagstat}"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-authors:
-  - "@drpatelh"
diff --git a/nextflow.config b/nextflow.config
index 8aa8722..e559a85 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -66,6 +66,7 @@ params {
     longread_clip                           = false
 
     // Host Removal
+    shortread_hostremoval           = false
     shortread_hostremoval_reference = null
     shortread_hostremoval_index     = null
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index fb2ca31..0b5162b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -266,8 +266,7 @@
             "type": "boolean"
         },
         "shortread_clipmerge_excludeunmerged": {
-            "type": "boolean",
-            "default": false
+            "type": "boolean"
         },
         "longread_clip": {
             "type": "boolean"
@@ -304,6 +303,17 @@
         "shortread_clipmerge_minlength": {
             "type": "integer",
             "default": 15
+        },
+        "shortread_hostremoval": {
+            "type": "boolean"
+        },
+        "shortread_hostremoval_reference": {
+            "type": "string",
+            "default": null
+        },
+        "shortread_hostremoval_index": {
+            "type": "string",
+            "default": null
         }
     }
 }
diff --git a/subworkflows/local/shortread_hostremoval.nf b/subworkflows/local/shortread_hostremoval.nf
index 4b0861c..505f989 100644
--- a/subworkflows/local/shortread_hostremoval.nf
+++ b/subworkflows/local/shortread_hostremoval.nf
@@ -2,38 +2,33 @@
 // Remove host reads via alignment and export off-target reads
 //
 
-include { BOWTIE2_ALIGN             } from '../../../modules/nf-core/modules/bowtie2/align/main'
-include { BOWTIE2_BUILD             } from '../../../modules/nf-core/modules/bowtie2/build/main'
-include { SAMTOOLS_VIEW             } from '../../../modules/nf-core/modules/samtools/view/main'
-include { SAMTOOLS_FASTQ            } from '../../../modules/nf-core/modules/samtools/fastq/main'
-include { SAMTOOLS_FLAGSTAT         } from '../../../modules/nf-core/modules/samtools/flagstat/main'
+include { BOWTIE2_BUILD             } from '../../modules/nf-core/modules/bowtie2/build/main'
+include { BOWTIE2_ALIGN             } from '../../modules/nf-core/modules/bowtie2/align/main'
 
-workflow SHORTREAD_PREPROCESSING {
+workflow SHORTREAD_HOSTREMOVAL {
     take:
     reads     // [ [ meta ], [ reads ] ]
     reference // /path/to/fasta
+    index     // /path/to/index
 
     main:
     ch_versions       = Channel.empty()
     ch_multiqc_files  = Channel.empty()
 
     if ( !params.shortread_hostremoval_index ) {
-        file( , checkIfExists: true )
-        BOWTIE2_BUILD ( reference )
-        ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
+        ch_bowtie2_index = BOWTIE2_BUILD ( reference ).index
+        ch_versions      = ch_versions.mix( BOWTIE2_BUILD.out.versions )
+    } else {
+        ch_bowtie2_index = index.first()
     }
 
-    BOWTIE2_ALIGN ( reads, BOWTIE2_BUILD.out.index )
-    ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions )
-    ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.log )
-
-    SAMTOOLS_FLAGSTAT ( BOWTIE2_ALIGN.out.bam )
-    ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions )
-    ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT.out.flagstat )
+    BOWTIE2_ALIGN ( reads, ch_bowtie2_index, true )
+    ch_versions      = ch_versions.mix( BOWTIE2_ALIGN.out.versions.first() )
+    ch_multiqc_files = ch_multiqc_files.mix( BOWTIE2_ALIGN.out.log )
 
     emit:
     reads    = BOWTIE2_ALIGN.out.fastq   // channel: [ val(meta), [ reads ] ]
-    versions = ch_versions          // channel: [ versions.yml ]
+    versions = ch_versions               // channel: [ versions.yml ]
     mqc      = ch_multiqc_files
 }
 
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index c5678f1..631aee6 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -23,8 +23,11 @@ if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-cor
 if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
 
 // TODO Add check if index but no reference exit 1
-if (params.shortread_hostremoval_reference   ) { ch_reference       = file(params.shortread_hostremoval_reference) } else {    }
-if (params.shortread_hostremoval_index)        { ch_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_reference_index = [] }
+if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
+if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
+
+if (params.shortread_hostremoval_reference ) { ch_reference       = file(params.shortread_hostremoval_reference) } else { ch_reference       = [] }
+if (params.shortread_hostremoval_index     ) { ch_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_reference_index = [] }
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -49,6 +52,7 @@ include { INPUT_CHECK             } from '../subworkflows/local/input_check'
 include { DB_CHECK                } from '../subworkflows/local/db_check'
 include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing'
 include { LONGREAD_PREPROCESSING  } from '../subworkflows/local/longread_preprocessing'
+include { SHORTREAD_HOSTREMOVAL   } from '../subworkflows/local/shortread_hostremoval'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -120,17 +124,24 @@ workflow TAXPROFILER {
     if ( params.longread_clip ) {
         ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
                                         .map { it -> [ it[0], [it[1]] ] }
-    ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
-    } else {
+        ch_versions = ch_versions.mix(LONGREAD_PREPROCESSING.out.versions.first())
+    } else {SHORTREAD_HOSTREMOVAL
         ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
     }
 
+    if ( params.shortread_hostremoval ) {
+        ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_preprocessed, ch_reference, ch_reference_index ).reads
+        ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first())
+    } else {
+        ch_shortreads_hostremoved = ch_shortreads_preprocessed
+    }
+
     /*
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = ch_shortreads_preprocessed
+    ch_input_for_profiling = ch_shortreads_hostremoved
             .mix( ch_longreads_preprocessed )
             .combine(DB_CHECK.out.dbs)
             .branch {
@@ -196,9 +207,15 @@ workflow TAXPROFILER {
     if (params.shortread_clipmerge) {
         ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_PREPROCESSING.out.mqc)
     }
+
+    if (params.shortread_hostremoval) {
+        ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
+    }
+
     if (params.longread_clip) {
         ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_PREPROCESSING.out.mqc)
     }
+
     if (params.run_kraken2) {
         ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]))
         ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first())

From a76576c16b44be09548f8f8c6bede81aef2a4d99 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Sun, 3 Apr 2022 17:24:50 +0200
Subject: [PATCH 03/25] Prettier

---
 modules.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index 398c351..cd1a0a9 100644
--- a/modules.json
+++ b/modules.json
@@ -44,4 +44,4 @@
             }
         }
     }
-}
\ No newline at end of file
+}

From 7b08c49cd6cb6471384a26c1202733cad0fe58ae Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 11:54:54 +0200
Subject: [PATCH 04/25] Re-add run merging and gonna let GHA see if it works
 >.>

---
 .github/workflows/ci.yml |  1 +
 nextflow.config          |  2 ++
 workflows/taxprofiler.nf | 25 +++++++++++++++++++++++--
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 79148f0..7678645 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,6 +39,7 @@ jobs:
           - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
           - "--shortread_complexityfilter_tool bbduk"
           - "--shortread_complexityfilter_tool prinseq"
+          - "--run_merging"
 
     steps:
       - name: Check out pipeline code
diff --git a/nextflow.config b/nextflow.config
index 19cc823..1c69d36 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -76,6 +76,8 @@ params {
     shortread_complexityfilter_prinseqplusplus_dustscore = 0.5
     save_complexityfiltered_reads                        = false
 
+    // run merging
+    run_merging                = false
 
     // MALT
     run_malt                   = false
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 3b08402..61eda6e 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -125,13 +125,34 @@ workflow TAXPROFILER {
         ch_shortreads_filtered = ch_shortreads_preprocessed
     }
 
+    /*
+        STEP: Run merging
+    */
+
+    if ( params.run_merging ) {
+        ch_reads_for_cat = ch_shortreads_filtered
+            .mix( ch_longreads_preprocessed )
+            .map {
+                meta, reads ->
+                    def meta_new = meta.clone()
+                    meta_new['run_accession'].remove()
+                    [ meta_new, reads ]
+            }
+            .groupTuple()
+
+        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat )
+
+    } else {
+        ch_reads_runmerged = ch_shortreads_filtered
+            .mix( ch_longreads_preprocessed )
+    }
+
     /*
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = ch_shortreads_filtered
-            .mix( ch_longreads_preprocessed )
+    ch_input_for_profiling = ch_reads_runmerged
             .combine(DB_CHECK.out.dbs)
             .branch {
                 malt:    it[2]['tool'] == 'malt'

From 74c496f6af04e15e1625d7311791004a645b3a21 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 11:58:16 +0200
Subject: [PATCH 05/25] Fix CAT_FASTQ output

---
 workflows/taxprofiler.nf | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 61eda6e..2d0c17d 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -140,8 +140,7 @@ workflow TAXPROFILER {
             }
             .groupTuple()
 
-        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat )
-
+        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads
     } else {
         ch_reads_runmerged = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )
@@ -247,6 +246,10 @@ workflow TAXPROFILER {
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
+    if (params.run_merging){
+        ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
+    }
+
     if (params.run_kraken2) {
         ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])  )
         ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )

From 4d726a87e98f56bf1a4e0d52259d9e83f89539ce Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 12:01:07 +0200
Subject: [PATCH 06/25] Fix metadata removal

---
 workflows/taxprofiler.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 2d0c17d..33d9725 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -135,7 +135,7 @@ workflow TAXPROFILER {
             .map {
                 meta, reads ->
                     def meta_new = meta.clone()
-                    meta_new['run_accession'].remove()
+                    meta_new.remove('run_accession')
                     [ meta_new, reads ]
             }
             .groupTuple()

From d130a72d74899c3fd85db3c1c751b7b6848fd031 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 13:09:23 +0200
Subject: [PATCH 07/25] Get this working

---
 .github/workflows/ci.yml              |  1 +
 conf/modules.config                   |  6 +++---
 subworkflows/local/shortread_fastp.nf |  4 ++--
 workflows/taxprofiler.nf              | 18 ++++++++++++++++--
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7678645..53423cb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -40,6 +40,7 @@ jobs:
           - "--shortread_complexityfilter_tool bbduk"
           - "--shortread_complexityfilter_tool prinseq"
           - "--run_merging"
+          - "--run_merging --shortread_clipmerge_mergepairs"
 
     steps:
       - name: Check out pipeline code
diff --git a/conf/modules.config b/conf/modules.config
index b59850f..7602e3f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -163,7 +163,7 @@ process {
 
     withName: MALT_RUN {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/malt/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -173,7 +173,7 @@ process {
 
     withName: KRAKEN2_KRAKEN2 {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/kraken2/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -183,7 +183,7 @@ process {
 
     withName: METAPHLAN3 {
         publishDir = [
-            path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
+        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
             mode: params.publish_dir_mode,
             pattern: '*.{biom,txt}'
         ]
diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf
index 18baf17..04057b1 100644
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@@ -28,8 +28,8 @@ workflow SHORTREAD_FASTP {
                                         .map {
                                             meta, reads ->
                                                 def meta_new = meta.clone()
-                                                meta_new['single_end'] = 1
-                                                [ meta_new, reads ]
+                                                meta_new['single_end'] = true
+                                                [ meta_new, reads.flatten() ]
                                         }
 
         ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads )
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 33d9725..e04d4d6 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -130,7 +130,8 @@ workflow TAXPROFILER {
     */
 
     if ( params.run_merging ) {
-        ch_reads_for_cat = ch_shortreads_filtered
+
+        ch_reads_for_cat_branch = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )
             .map {
                 meta, reads ->
@@ -139,8 +140,21 @@ workflow TAXPROFILER {
                     [ meta_new, reads ]
             }
             .groupTuple()
+            .map {
+                meta, reads ->
+                    [ meta, reads.flatten() ]
+            }
+            .branch {
+                // we can't concate files if there is not a second run, we branch
+                // here to separate them out, and mix after
+                cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 )
+                skip: true
+            }
+
+        ch_reads_for_cat_branch.cat.dump(tag: "for_catting")
+
+        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip )
 
-        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat ).reads
     } else {
         ch_reads_runmerged = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )

From ca011ccc5b363ea9d6c1eaf44713a9c09e471f39 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 13:28:37 +0200
Subject: [PATCH 08/25] Fix cat_fastq naming logic?

---
 conf/modules.config      | 19 +++++++++++++++----
 nextflow.config          |  1 +
 nextflow_schema.json     | 38 +++++++++++++++++++++++++++++---------
 workflows/taxprofiler.nf |  4 ----
 4 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 7602e3f..97e9510 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -161,9 +161,19 @@ process {
         ]
     }
 
+    withName: CAT_FASTQ {
+        ext.prefix = { "${meta.id}-${meta.run_accession}" }
+        publishDir = [
+            path: { "${params.outdir}/run_merging/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.fastq.gz',
+            enabled: params.save_runmerged_reads
+        ]
+    }
+
     withName: MALT_RUN {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/malt/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -173,7 +183,7 @@ process {
 
     withName: KRAKEN2_KRAKEN2 {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/kraken2/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -182,12 +192,13 @@ process {
     }
 
     withName: METAPHLAN3 {
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
-        ext.prefix = if params.run_merging : { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+            path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
             mode: params.publish_dir_mode,
             pattern: '*.{biom,txt}'
         ]
-        ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
     }
 
     withName: CUSTOM_DUMPSOFTWAREVERSIONS {
diff --git a/nextflow.config b/nextflow.config
index 1c69d36..d969ed9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -78,6 +78,7 @@ params {
 
     // run merging
     run_merging                = false
+    save_runmerged_reads       = false
 
     // MALT
     run_malt                   = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6858409..0b4b4fb 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,10 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
+                "input",
+                "outdir"
+            ],
             "properties": {
                 "input": {
                     "type": "string",
@@ -173,7 +176,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -288,7 +298,10 @@
         "shortread_clipmerge_tool": {
             "type": "string",
             "default": "fastp",
-            "enum": ["fastp", "adapterremoval"]
+            "enum": [
+                "fastp",
+                "adapterremoval"
+            ]
         },
         "shortread_clipmerge_skipadaptertrim": {
             "type": "boolean"
@@ -309,8 +322,7 @@
             "default": 15
         },
         "save_preprocessed_reads": {
-            "type": "boolean",
-            "default": false
+            "type": "boolean"
         },
         "shortread_complexityfilter_tool": {
             "type": "string",
@@ -333,15 +345,23 @@
         "shortread_complexityfilter_prinseqplusplus_mode": {
             "type": "string",
             "default": "entropy",
-            "enum": ["entropy", "dust"]
+            "enum": [
+                "entropy",
+                "dust"
+            ]
         },
         "shortread_complexityfilter_prinseqplusplus_dustscore": {
             "type": "number",
             "default": 0.5
         },
         "save_complexityfiltered_reads": {
-            "type": "boolean",
-            "default": false
+            "type": "boolean"
+        },
+        "run_merging": {
+            "type": "boolean"
+        },
+        "save_runmerged_reads": {
+            "type": "boolean"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index e04d4d6..bdb93ab 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -140,10 +140,6 @@ workflow TAXPROFILER {
                     [ meta_new, reads ]
             }
             .groupTuple()
-            .map {
-                meta, reads ->
-                    [ meta, reads.flatten() ]
-            }
             .branch {
                 // we can't concate files if there is not a second run, we branch
                 // here to separate them out, and mix after

From a634814d848a68252d81b231e34942f5fa616c83 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 13:34:00 +0200
Subject: [PATCH 09/25] Formatting and fix fastp output

---
 nextflow_schema.json                          | 26 ++++---------------
 .../local/shortread_adapterremoval.nf         |  2 +-
 2 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 0b4b4fb..64836df 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,10 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "outdir"
-            ],
+            "required": ["input", "outdir"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -176,14 +173,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -298,10 +288,7 @@
         "shortread_clipmerge_tool": {
             "type": "string",
             "default": "fastp",
-            "enum": [
-                "fastp",
-                "adapterremoval"
-            ]
+            "enum": ["fastp", "adapterremoval"]
         },
         "shortread_clipmerge_skipadaptertrim": {
             "type": "boolean"
@@ -345,10 +332,7 @@
         "shortread_complexityfilter_prinseqplusplus_mode": {
             "type": "string",
             "default": "entropy",
-            "enum": [
-                "entropy",
-                "dust"
-            ]
+            "enum": ["entropy", "dust"]
         },
         "shortread_complexityfilter_prinseqplusplus_dustscore": {
             "type": "number",
@@ -364,4 +348,4 @@
             "type": "boolean"
         }
     }
-}
\ No newline at end of file
+}
diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index b573be9..a7948e7 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                 [meta_new, reads]
             }
             .groupTuple()
-            .map { meta, fastq -> [meta, fastq.flatten()] }
+            .map { meta, fastq -> [meta, [ fastq ].flatten()] }
 
 
         CAT_FASTQ(ch_concat_fastq)

From 6c14f2b230a6c6df99e89c44ed8904cb0a5a7b59 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 13:44:52 +0200
Subject: [PATCH 10/25] Remove the flattening?

---
 subworkflows/local/shortread_adapterremoval.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index a7948e7..735d3b8 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                 [meta_new, reads]
             }
             .groupTuple()
-            .map { meta, fastq -> [meta, [ fastq ].flatten()] }
+            .map { meta, fastq -> [meta, fastq] }
 
 
         CAT_FASTQ(ch_concat_fastq)

From 35cb6e042acf3fbe34c9664bebb9c27ef0d96179 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 13:46:12 +0200
Subject: [PATCH 11/25] Flatten the right thing

---
 subworkflows/local/shortread_adapterremoval.nf | 2 +-
 subworkflows/local/shortread_fastp.nf          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/shortread_adapterremoval.nf b/subworkflows/local/shortread_adapterremoval.nf
index 735d3b8..b573be9 100644
--- a/subworkflows/local/shortread_adapterremoval.nf
+++ b/subworkflows/local/shortread_adapterremoval.nf
@@ -67,7 +67,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
                 [meta_new, reads]
             }
             .groupTuple()
-            .map { meta, fastq -> [meta, fastq] }
+            .map { meta, fastq -> [meta, fastq.flatten()] }
 
 
         CAT_FASTQ(ch_concat_fastq)
diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf
index 04057b1..4626691 100644
--- a/subworkflows/local/shortread_fastp.nf
+++ b/subworkflows/local/shortread_fastp.nf
@@ -29,7 +29,7 @@ workflow SHORTREAD_FASTP {
                                             meta, reads ->
                                                 def meta_new = meta.clone()
                                                 meta_new['single_end'] = true
-                                                [ meta_new, reads.flatten() ]
+                                                [ meta_new, [ reads ].flatten() ]
                                         }
 
         ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads )

From 8839fe22b8856fb1d69e11e2e1e0a4367cb97dc3 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 20:02:22 +0200
Subject: [PATCH 12/25] Fix output tuple for reads

---
 workflows/taxprofiler.nf | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index bdb93ab..7d5f60f 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -149,7 +149,15 @@ workflow TAXPROFILER {
 
         ch_reads_for_cat_branch.cat.dump(tag: "for_catting")
 
-        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads.mix( ch_reads_for_cat_branch.skip )
+        ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads
+            .mix( ch_reads_for_cat_branch.skip )
+            .map {
+                meta, reads ->
+
+                [ meta, [ reads ].flatten() ]
+            }
+
+        ch_reads_runmerged.dump(tag: "ch_reads_runmerged" )
 
     } else {
         ch_reads_runmerged = ch_shortreads_filtered

From afb66e445fcb2f9c60f4152e1184f5f8600a843c Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Fri, 8 Apr 2022 21:36:59 +0200
Subject: [PATCH 13/25] Append pairment to ID at profiling to prevent
 multiqc-level filename crash

---
 workflows/taxprofiler.nf | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 7d5f60f..1da812d 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -147,8 +147,6 @@ workflow TAXPROFILER {
                 skip: true
             }
 
-        ch_reads_for_cat_branch.cat.dump(tag: "for_catting")
-
         ch_reads_runmerged = CAT_FASTQ ( ch_reads_for_cat_branch.cat ).reads
             .mix( ch_reads_for_cat_branch.skip )
             .map {
@@ -157,8 +155,6 @@ workflow TAXPROFILER {
                 [ meta, [ reads ].flatten() ]
             }
 
-        ch_reads_runmerged.dump(tag: "ch_reads_runmerged" )
-
     } else {
         ch_reads_runmerged = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )
@@ -168,8 +164,15 @@ workflow TAXPROFILER {
         COMBINE READS WITH POSSIBLE DATABASES
     */
 
-    // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
+    // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], [ <reads_path>/2612.merged.fastq.gz ], ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
     ch_input_for_profiling = ch_reads_runmerged
+            .map {
+                meta, reads ->
+                    def meta_new = meta.clone()
+                    pairtype = meta_new['single_end'] ? '_se' : '_pe'
+                    meta_new['id'] =  meta_new['id'] + pairtype
+                    [meta_new, reads]
+            }
             .combine(DB_CHECK.out.dbs)
             .branch {
                 malt:    it[2]['tool'] == 'malt'

From 29073e3c93e1ea5a4221ca8ec2080547716b503c Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 11 Apr 2022 13:38:42 +0200
Subject: [PATCH 14/25] Update CI tests

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 79148f0..5e57889 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -39,6 +39,7 @@ jobs:
           - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
           - "--shortread_complexityfilter_tool bbduk"
           - "--shortread_complexityfilter_tool prinseq"
+          - "--shortread_complexityfilter false --shortread_hostremoval"
 
     steps:
       - name: Check out pipeline code

From 78182c2a8ca72d17b3a1004f100180d44439db42 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Mon, 11 Apr 2022 13:40:24 +0200
Subject: [PATCH 15/25] Add comma

---
 workflows/taxprofiler.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index abb330d..95e3588 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -125,7 +125,7 @@ workflow TAXPROFILER {
     } else {
         ch_longreads_preprocessed = INPUT_CHECK.out.nanopore
     }
-    
+
     /*
         SUBWORKFLOW: COMPLEXITY FILTERING
     */
@@ -146,12 +146,12 @@ workflow TAXPROFILER {
     } else {
         ch_shortreads_hostremoved = ch_shortreads_filtered
     }
-    
+
     /*
         SUBWORKFLOW: PROFILING
     */
 
-    PROFILING ( ch_shortreads_hostremoved ch_longreads_preprocessed, DB_CHECK.out.dbs )
+    PROFILING ( ch_shortreads_hostremoved, ch_longreads_preprocessed, DB_CHECK.out.dbs )
     ch_versions = ch_versions.mix( PROFILING.out.versions )
 
     /*
@@ -191,7 +191,7 @@ workflow TAXPROFILER {
     if (params.shortread_hostremoval) {
         ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
     }
-    
+
     ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc )
 
     // TODO create multiQC module for metaphlan

From a5f4fc42d53fce99863a3d135b017b9e468722be Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 09:25:27 +0200
Subject: [PATCH 16/25] Fix run merging for unmerged PE data

---
 conf/modules.config      | 2 +-
 workflows/taxprofiler.nf | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 97e9510..eb448bb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -162,7 +162,7 @@ process {
     }
 
     withName: CAT_FASTQ {
-        ext.prefix = { "${meta.id}-${meta.run_accession}" }
+        ext.prefix = { "${meta.id}" }
         publishDir = [
             path: { "${params.outdir}/run_merging/" },
             mode: params.publish_dir_mode,
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 1da812d..2eb7e8c 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -139,7 +139,11 @@ workflow TAXPROFILER {
                     meta_new.remove('run_accession')
                     [ meta_new, reads ]
             }
-            .groupTuple()
+            .groupTuple(by: 0)
+            .map {
+                meta, reads ->
+                    [ meta, reads.flatten() ]
+            }
             .branch {
                 // we can't concate files if there is not a second run, we branch
                 // here to separate them out, and mix after
@@ -151,7 +155,6 @@ workflow TAXPROFILER {
             .mix( ch_reads_for_cat_branch.skip )
             .map {
                 meta, reads ->
-
                 [ meta, [ reads ].flatten() ]
             }
 

From 9f221f84cc66ca121a46d8f68db8f727dec523c9 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 10:12:17 +0200
Subject: [PATCH 17/25] Only supply single input channel to profiling, as these
 are merged into single input channel at run_merging

---
 subworkflows/local/profiling.nf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index c74c583..07b6b72 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -9,8 +9,7 @@ include { METAPHLAN3                  } from '../../modules/nf-core/modules/meta
 
 workflow PROFILING {
     take:
-    shortreads // [ [ meta ], [ reads ] ]
-    longreads // [ [ meta ], [ reads ] ]
+    reads // [ [ meta ], [ reads ] ]
     databases // [ [ meta ], path ]
 
     main:
@@ -22,9 +21,9 @@ workflow PROFILING {
     */
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
-    ch_input_for_profiling = shortreads
-            .mix( longreads )
+    ch_input_for_profiling = reads
             .combine(databases)
+            .dump(tag: "combined_withdbs")
             .branch {
                 malt:    it[2]['tool'] == 'malt'
                 kraken2: it[2]['tool'] == 'kraken2'

From 2ef21c6ef3e46ae824ad5c99970b8a5250ef8e38 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 10:14:05 +0200
Subject: [PATCH 18/25] Fix input to profiling

---
 subworkflows/local/profiling.nf | 1 -
 workflows/taxprofiler.nf        | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 07b6b72..ac744aa 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -23,7 +23,6 @@ workflow PROFILING {
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
     ch_input_for_profiling = reads
             .combine(databases)
-            .dump(tag: "combined_withdbs")
             .branch {
                 malt:    it[2]['tool'] == 'malt'
                 kraken2: it[2]['tool'] == 'kraken2'
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 0916cac..7c02f4c 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -167,7 +167,7 @@ workflow TAXPROFILER {
         SUBWORKFLOW: PROFILING
     */
 
-    PROFILING ( ch_reads_runmerged, ch_longreads_preprocessed, DB_CHECK.out.dbs )
+    PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
     ch_versions = ch_versions.mix( PROFILING.out.versions )
 
     /*

From 26399718b2440499f294ebc59d669dd78dd7cdb6 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 10:46:03 +0200
Subject: [PATCH 19/25] Re-add pairment attachment

---
 subworkflows/local/profiling.nf | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index ac744aa..b03b83e 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -23,6 +23,12 @@ workflow PROFILING {
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
     ch_input_for_profiling = reads
             .combine(databases)
+                meta, reads ->
+                    def meta_new = meta.clone()
+                        pairtype = meta_new['single_end'] ? '_se' : '_pe'
+                        meta_new['id'] =  meta_new['id'] + pairtype
+                    [meta_new, reads]
+            }
             .branch {
                 malt:    it[2]['tool'] == 'malt'
                 kraken2: it[2]['tool'] == 'kraken2'

From 8d689141924a52cf72e666e76db1b575d541a341 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 10:47:44 +0200
Subject: [PATCH 20/25] Re-add operator name

---
 subworkflows/local/profiling.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index b03b83e..8a156c2 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -23,6 +23,7 @@ workflow PROFILING {
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
     ch_input_for_profiling = reads
             .combine(databases)
+            .map {
                 meta, reads ->
                     def meta_new = meta.clone()
                         pairtype = meta_new['single_end'] ? '_se' : '_pe'

From a15c45b00cd18e2148baf7112f15c340261acdef Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Tue, 12 Apr 2022 10:51:27 +0200
Subject: [PATCH 21/25] Put map in the rigt place

---
 subworkflows/local/profiling.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index 8a156c2..18de739 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -22,7 +22,6 @@ workflow PROFILING {
 
     // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
     ch_input_for_profiling = reads
-            .combine(databases)
             .map {
                 meta, reads ->
                     def meta_new = meta.clone()
@@ -30,6 +29,7 @@ workflow PROFILING {
                         meta_new['id'] =  meta_new['id'] + pairtype
                     [meta_new, reads]
             }
+            .combine(databases)
             .branch {
                 malt:    it[2]['tool'] == 'malt'
                 kraken2: it[2]['tool'] == 'kraken2'

From 16a3556bfcfddf8195bbcd486653325aaa1de4ee Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Wed, 13 Apr 2022 08:26:08 +0200
Subject: [PATCH 22/25] Changes after code review

---
 workflows/taxprofiler.nf | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 7c02f4c..58671b3 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -139,15 +139,16 @@ workflow TAXPROFILER {
                     meta_new.remove('run_accession')
                     [ meta_new, reads ]
             }
-            .groupTuple(by: 0)
+            .groupTuple()
             .map {
                 meta, reads ->
                     [ meta, reads.flatten() ]
             }
             .branch {
+                meta, reads ->
                 // we can't concatenate files if there is not a second run, we branch
                 // here to separate them out, and mix back in after for efficiency
-                cat: ( it[0]['single_end'] && it[1].size() > 1 ) || ( !it[0]['single_end'] && it[1].size() > 2 )
+                cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 )
                 skip: true
             }
 

From dfeaa0d1fe79a6f59c27bdb481904b3bbb620234 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Wed, 13 Apr 2022 12:00:28 +0200
Subject: [PATCH 23/25] Rename subworkflow parameters for consistency

---
 .github/workflows/ci.yml |  8 ++++----
 conf/modules.config      |  6 +++---
 conf/test.config         |  6 +++---
 nextflow.config          | 11 ++++++-----
 nextflow_schema.json     | 22 +++++++++++-----------
 workflows/taxprofiler.nf | 16 ++++++++--------
 6 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 53423cb..c373bc8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -29,8 +29,8 @@ jobs:
           - NXF_VER: ""
             NXF_EDGE: "1"
         parameters:
-          - "--longread_clip false"
-          - "--shortread_clip false"
+          - "--perform_longread_clip false"
+          - "--perform_shortread_clipmerge false"
           - "--shortread_clipmerge_tool fastp"
           - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs --shortread_clipmerge_excludeunmerged"
           - "--shortread_clipmerge_tool fastp --shortread_clipmerge_mergepairs"
@@ -39,8 +39,8 @@ jobs:
           - "--shortread_clipmerge_tool adapterremoval --shortread_clipmerge_mergepairs"
           - "--shortread_complexityfilter_tool bbduk"
           - "--shortread_complexityfilter_tool prinseq"
-          - "--run_merging"
-          - "--run_merging --shortread_clipmerge_mergepairs"
+          - "--perform_runmerging"
+          - "--perform_runmerging --shortread_clipmerge_mergepairs"
 
     steps:
       - name: Check out pipeline code
diff --git a/conf/modules.config b/conf/modules.config
index d93486f..42528de 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -173,7 +173,7 @@ process {
 
     withName: MALT_RUN {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/malt/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -183,7 +183,7 @@ process {
 
     withName: KRAKEN2_KRAKEN2 {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/kraken2/${meta.db_name}" },
             mode: params.publish_dir_mode,
@@ -193,7 +193,7 @@ process {
 
     withName: METAPHLAN3 {
         ext.args = { "${meta.db_params}" }
-        ext.prefix = params.run_merging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
         publishDir = [
             path: { "${params.outdir}/metaphlan3/${meta.db_name}" },
             mode: params.publish_dir_mode,
diff --git a/conf/test.config b/conf/test.config
index 6e82300..923dda7 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -28,7 +28,7 @@ params {
     run_malt                      = true
     run_metaphlan3                = true
     run_centrifuge                = true
-    shortread_clipmerge           = true
-    longread_clip                 = false
-    shortread_complexityfilter    = true
+    perform_shortread_clipmerge           = true
+    perform_longread_clip                 = false
+    perform_shortread_complexityfilter    = true
 }
diff --git a/nextflow.config b/nextflow.config
index da8bbdb..b72b4f9 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -55,7 +55,7 @@ params {
     databases = null
 
     // FASTQ preprocessing
-    shortread_clipmerge                     = false
+    perform_shortread_clipmerge             = false
     shortread_clipmerge_tool                = 'fastp'
     shortread_clipmerge_skipadaptertrim     = false
     shortread_clipmerge_mergepairs          = false
@@ -63,11 +63,11 @@ params {
     shortread_clipmerge_adapter1            = null
     shortread_clipmerge_adapter2            = null
     shortread_clipmerge_minlength           = 15
-    longread_clip                           = false
+    perform_longread_clip                   = false
     save_preprocessed_reads                 = false
 
     // Complexity filtering
-    shortread_complexityfilter                           = false
+    perform_shortread_complexityfilter                   = false
     shortread_complexityfilter_tool                      = 'bbduk'
     shortread_complexityfilter_entropy                   = 0.3
     shortread_complexityfilter_bbduk_windowsize          = 50
@@ -77,8 +77,8 @@ params {
     save_complexityfiltered_reads                        = false
 
     // run merging
-    run_merging                = false
-    save_runmerged_reads       = false
+    perform_runmerging          = false
+    save_runmerged_reads        = false
 
     // MALT
     run_malt                   = false
@@ -92,6 +92,7 @@ params {
     centrifuge_save_unaligned  = false
     centrifuge_save_aligned    = false
     centrifuge_sam_format      = false
+
     // metaphlan3
     run_metaphlan3             = false
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4db7fa0..06bd94b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -262,15 +262,9 @@
             "type": "string",
             "default": "None"
         },
-        "shortread_clipmerge": {
-            "type": "boolean"
-        },
         "shortread_clipmerge_excludeunmerged": {
             "type": "boolean"
         },
-        "longread_clip": {
-            "type": "boolean"
-        },
         "run_malt": {
             "type": "boolean"
         },
@@ -334,9 +328,6 @@
         "shortread_complexityfilter_bbduk_mask": {
             "type": "boolean"
         },
-        "shortread_complexityfilter": {
-            "type": "boolean"
-        },
         "shortread_complexityfilter_entropy": {
             "type": "number",
             "default": 0.3
@@ -353,10 +344,19 @@
         "save_complexityfiltered_reads": {
             "type": "boolean"
         },
-        "run_merging": {
+        "save_runmerged_reads": {
             "type": "boolean"
         },
-        "save_runmerged_reads": {
+        "perform_shortread_clipmerge": {
+            "type": "boolean"
+        },
+        "perform_longread_clip": {
+            "type": "boolean"
+        },
+        "perform_shortread_complexityfilter": {
+            "type": "boolean"
+        },
+        "perform_runmerging": {
             "type": "boolean"
         }
     }
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 58671b3..f086557 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -101,14 +101,14 @@ workflow TAXPROFILER {
     /*
         SUBWORKFLOW: PERFORM PREPROCESSING
     */
-    if ( params.shortread_clipmerge ) {
+    if ( params.perform_shortread_clipmerge ) {
 
         ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING ( INPUT_CHECK.out.fastq ).reads
     } else {
         ch_shortreads_preprocessed = INPUT_CHECK.out.fastq
     }
 
-    if ( params.longread_clip ) {
+    if ( params.perform_longread_clip ) {
         ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( INPUT_CHECK.out.nanopore ).reads
                                         .map { it -> [ it[0], [it[1]] ] }
     } else {
@@ -119,7 +119,7 @@ workflow TAXPROFILER {
         SUBWORKFLOW: COMPLEXITY FILTERING
     */
 
-    if ( params.shortread_complexityfilter ) {
+    if ( params.perform_shortread_complexityfilter ) {
         ch_shortreads_filtered = SHORTREAD_COMPLEXITYFILTERING ( ch_shortreads_preprocessed ).reads
     } else {
         ch_shortreads_filtered = ch_shortreads_preprocessed
@@ -129,7 +129,7 @@ workflow TAXPROFILER {
         STEP: Run merging
     */
 
-    if ( params.run_merging ) {
+    if ( params.perform_runmerging ) {
 
         ch_reads_for_cat_branch = ch_shortreads_filtered
             .mix( ch_longreads_preprocessed )
@@ -190,22 +190,22 @@ workflow TAXPROFILER {
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
-    if (params.shortread_clipmerge) {
+    if (params.perform_shortread_clipmerge) {
         ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_PREPROCESSING.out.versions )
     }
 
-    if (params.longread_clip) {
+    if (params.perform_longread_clip) {
         ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions )
     }
 
-    if (params.shortread_complexityfilter){
+    if (params.perform_shortread_complexityfilter){
         ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_COMPLEXITYFILTERING.out.mqc.collect{it[1]}.ifEmpty([]) )
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
-    if (params.run_merging){
+    if (params.perform_runmerging){
         ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
     }
 

From aa2d07c42ae4e451711a4147264782d567576e88 Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Wed, 13 Apr 2022 14:19:19 +0200
Subject: [PATCH 24/25] Fix merge cockup

---
 .github/workflows/ci.yml |  2 +-
 conf/test.config         |  2 +-
 nextflow.config          |  2 +-
 nextflow_schema.json     |  2 +-
 workflows/taxprofiler.nf | 13 +++++--------
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b18e601..a1ece72 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -41,7 +41,7 @@ jobs:
           - "--shortread_complexityfilter_tool prinseq"
           - "--perform_runmerging"
           - "--perform_runmerging --shortread_clipmerge_mergepairs"
-          - "--shortread_complexityfilter false --shortread_hostremoval"
+          - "--shortread_complexityfilter false --perform_shortread_hostremoval"
 
     steps:
       - name: Check out pipeline code
diff --git a/conf/test.config b/conf/test.config
index 1d08d91..8f12312 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -31,6 +31,6 @@ params {
     perform_shortread_clipmerge           = true
     perform_longread_clip                 = false
     perform_shortread_complexityfilter    = true
-    shortread_hostremoval           = true
+    perform_shortread_hostremoval           = true
     shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
 }
diff --git a/nextflow.config b/nextflow.config
index 6b0a79d..94c5837 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -81,7 +81,7 @@ params {
     save_runmerged_reads        = false
 
     // Host Removal
-    shortread_hostremoval           = false
+    perform_shortread_hostremoval           = false
     shortread_hostremoval_reference = null
     shortread_hostremoval_index     = null
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 07295e3..cf0edab 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -359,7 +359,7 @@
         "perform_runmerging": {
             "type": "boolean"
         },
-        "shortread_hostremoval": {
+        "perform_shortread_hostremoval": {
             "type": "boolean"
         },
         "shortread_hostremoval_reference": {
diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf
index 894a1e1..9fe8cc8 100644
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@@ -22,11 +22,10 @@ if (params.databases) { ch_databases = file(params.databases) } else { exit 1, '
 if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not accept uncollapsed paired-reads. Pairs will be profiled as separate files."
 if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
 
-// TODO Add check if index but no reference exit 1
-if (params.shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
+if (params.perform_shortread_hostremoval && !params.shortread_hostremoval_reference) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval requested but no --shortread_hostremoval_reference FASTA supplied. Check input." }
 if (!params.shortread_hostremoval_reference && params.shortread_hostremoval_reference_index) { exit 1, "[nf-core/taxprofiler] error: --shortread_hostremoval_index provided but no --shortread_hostremoval_reference FASTA supplied. Check input." }
 
-if (params.shortread_hostremoval_reference ) { ch_reference       = file(params.shortread_hostremoval_reference) } else { ch_reference       = [] }
+if (params.shortread_hostremoval_reference ) { ch_reference       = file(params.shortread_hostremoval_reference) }
 if (params.shortread_hostremoval_index     ) { ch_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_reference_index = [] }
 
 /*
@@ -140,15 +139,13 @@ workflow TAXPROFILER {
         SUBWORKFLOW: HOST REMOVAL
     */
 
-    if ( params.shortread_hostremoval ) {
+    if ( params.perform_shortread_hostremoval ) {
         ch_shortreads_hostremoved = SHORTREAD_HOSTREMOVAL ( ch_shortreads_filtered, ch_reference, ch_reference_index ).reads
         ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions.first())
     } else {
         ch_shortreads_hostremoved = ch_shortreads_filtered
     }
 
-    */
-
     if ( params.perform_runmerging ) {
 
         ch_reads_for_cat_branch = ch_shortreads_hostremoved
@@ -225,11 +222,11 @@ workflow TAXPROFILER {
         ch_versions = ch_versions.mix( SHORTREAD_COMPLEXITYFILTERING.out.versions )
     }
 
-    if (params.shortread_hostremoval) {
+    if (params.perform_shortread_hostremoval) {
         ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_HOSTREMOVAL.out.mqc.collect{it[1]}.ifEmpty([]))
         ch_versions = ch_versions.mix(SHORTREAD_HOSTREMOVAL.out.versions)
     }
-    
+
     if (params.perform_runmerging){
         ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
     }

From 5e80df0f949557883ea6e961b0ea7ba3214065c5 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Wed, 13 Apr 2022 15:27:53 +0200
Subject: [PATCH 25/25] Apply suggestions from code review

---
 conf/test.config | 4 ++--
 nextflow.config  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 8f12312..9fa5de8 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -31,6 +31,6 @@ params {
     perform_shortread_clipmerge           = true
     perform_longread_clip                 = false
     perform_shortread_complexityfilter    = true
-    perform_shortread_hostremoval           = true
-    shortread_hostremoval_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    perform_shortread_hostremoval         = true
+    shortread_hostremoval_reference       = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
 }
diff --git a/nextflow.config b/nextflow.config
index 94c5837..bf3ca92 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -81,7 +81,7 @@ params {
     save_runmerged_reads        = false
 
     // Host Removal
-    perform_shortread_hostremoval           = false
+    perform_shortread_hostremoval     = false
     shortread_hostremoval_reference = null
     shortread_hostremoval_index     = null