Merge pull request #296 from nf-core/post-patch-update

Post patch update
Merge branch 'dev' into post-patch-update
161 changed files with 5670 additions and 1846 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,27 @@
+{
+    "name": "nfcore",
+    "image": "nfcore/gitpod:latest",
+    "remoteUser": "gitpod",
+
+    // Configure tool-specific properties.
+    "customizations": {
+        // Configure properties specific to VS Code.
+        "vscode": {
+            // Set *default* container specific settings.json values on container create.
+            "settings": {
+                "python.defaultInterpreterPath": "/opt/conda/bin/python",
+                "python.linting.enabled": true,
+                "python.linting.pylintEnabled": true,
+                "python.formatting.autopep8Path": "/opt/conda/bin/autopep8",
+                "python.formatting.yapfPath": "/opt/conda/bin/yapf",
+                "python.linting.flake8Path": "/opt/conda/bin/flake8",
+                "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
+                "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
+                "python.linting.pylintPath": "/opt/conda/bin/pylint"
+            },
+
+            // Add the IDs of extensions you want installed when the container is created.
+            "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
+        }
+    }
+}
--- a/.editorconfig
+++ b/.editorconfig
@ -8,7 +8,7 @@ trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space

-[*.{md,yml,yaml,html,css,scss,js,cff}]
+[*.{md,yml,yaml,html,css,scss,js}]
 indent_size = 2

 # These files are edited and tested upstream in nf-core/modules
--- a/.gitattributes
+++ b/.gitattributes
@ -1,3 +1,4 @@
 *.config linguist-language=nextflow
+*.nf.test linguist-language=nextflow
 modules/nf-core/** linguist-generated
 subworkflows/nf-core/** linguist-generated
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ
 ### Images and figures

 For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines).
+
+## GitHub Codespaces
+
+This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal.
+
+To get started:
+
+- Open the repo in [Codespaces](https://github.com/nf-core/taxprofiler/codespaces)
+- Tools installed
+  - nf-core
+  - Nextflow
+
+Devcontainer specs:
+
+- [DevContainer config](.devcontainer/devcontainer.json)
+- [Dockerfile](.devcontainer/Dockerfile)
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -42,9 +42,9 @@ body:
    attributes:
      label: System information
      description: |
-        * Nextflow version _(eg. 21.10.3)_
+        * Nextflow version _(eg. 22.10.1)_
        * Hardware _(eg. HPC, Desktop, Cloud)_
        * Executor _(eg. slurm, local, awsbatch)_
-        * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_
+        * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_
        * OS _(eg. CentOS Linux, macOS, Linux Mint)_
        * Version of nf-core/taxprofiler _(eg. 1.1, 1.5, 1.8.2)_
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -15,7 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/taxp

 - [ ] This comment contains a description of changes (with reason).
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/taxprofiler/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/taxprofiler _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
+- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/taxprofiler/tree/master/.github/CONTRIBUTING.md)
+- [ ] If necessary, also make a PR on the nf-core/taxprofiler _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@ -14,10 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Launch workflow via tower
-        uses: nf-core/tower-action@v3
-        # TODO nf-core: You can customise AWS full pipeline tests as required
-        # Add full size test data (but still relatively small datasets for few samples)
-        # on the `test_full.config` test runs with only one set of parameters
+        uses: seqeralabs/action-tower-launch@v1
        with:
          workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
          access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
@ -28,3 +25,7 @@ jobs:
              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/taxprofiler/results-${{ github.sha }}"
            }
          profiles: test_full,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@ -12,7 +12,7 @@ jobs:
    steps:
      # Launch workflow using Tower CLI tool action
      - name: Launch workflow via tower
-        uses: nf-core/tower-action@v3
+        uses: seqeralabs/action-tower-launch@v1
        with:
          workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
          access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
@ -23,3 +23,7 @@ jobs:
              "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/taxprofiler/results-test-${{ github.sha }}"
            }
          profiles: test,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
--- a/.github/workflows/branch.yml
+++ b/.github/workflows/branch.yml
@ -13,7 +13,7 @@ jobs:
      - name: Check PRs
        if: github.repository == 'nf-core/taxprofiler'
        run: |
-          { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/taxprofiler ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
+          { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/taxprofiler ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]

      # If the above check failed, post a comment on the PR explaining the failure
      # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -11,6 +11,10 @@ on:
 env:
  NXF_ANSI_LOG: false

+concurrency:
+  group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+  cancel-in-progress: true
+
 jobs:
  test:
    name: Run pipeline with test data
@ -18,28 +22,21 @@ jobs:
    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}"
    runs-on: ubuntu-latest
    strategy:
+      fail-fast: false
      matrix:
        NXF_VER:
-          - "21.10.3"
+          - "22.10.1"
          - "latest-everything"
        parameters:
-          - "--perform_longread_qc false"
-          - "--perform_shortread_qc false"
+          - "--preprocessing_qc_tool falco"
          - "--shortread_qc_tool fastp"
-          - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
-          - "--shortread_qc_tool fastp --shortread_qc_mergepairs"
          - "--shortread_qc_tool adapterremoval"
-          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_excludeunmerged"
-          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
          - "--shortread_complexityfilter_tool bbduk"
          - "--shortread_complexityfilter_tool prinseqplusplus"
-          - "--perform_runmerging"
-          - "--perform_runmerging --shortread_qc_mergepairs"
-          - "--shortread_complexityfilter false --perform_shortread_hostremoval"

    steps:
      - name: Check out pipeline code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3

      - name: Install Nextflow
        uses: nf-core/setup-nextflow@v1
@ -67,7 +64,7 @@ jobs:
    strategy:
      matrix:
        NXF_VER:
-          - "21.10.3"
+          - "22.10.1"
          - "latest-everything"

    steps:
@ -99,3 +96,69 @@ jobs:
        with:
          command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
          attempt_limit: 3
+
+  krakenuniq:
+    name: Test KrakenUniq with workflow parameters
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        NXF_VER:
+          - "22.10.1"
+          - "latest-everything"
+
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Show current locale
+        run: locale
+
+      - name: Set UTF-8 enabled locale
+        run: |
+          sudo locale-gen en_US.UTF-8
+          sudo update-locale LANG=en_US.UTF-8
+
+      - name: Run pipeline with test data
+        uses: Wandalen/wretry.action@v1.0.11
+        with:
+          command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
+          attempt_limit: 3
+
+  malt:
+    name: Test MALT with workflow parameters
+    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        NXF_VER:
+          - "22.10.1"
+          - "latest-everything"
+
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@v2
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Show current locale
+        run: locale
+
+      - name: Set UTF-8 enabled locale
+        run: |
+          sudo locale-gen en_US.UTF-8
+          sudo update-locale LANG=en_US.UTF-8
+
+      - name: Run pipeline with test data
+        uses: Wandalen/wretry.action@v1.0.11
+        with:
+          command: nextflow run ${GITHUB_WORKSPACE} -profile test_nothing,docker --run_malt --input 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_shortreadsonly.csv' --outdir ./results
+          attempt_limit: 3
--- a/.github/workflows/clean-up.yml
+++ b/.github/workflows/clean-up.yml
@ -0,0 +1,24 @@
+name: "Close user-tagged issues and PRs"
+on:
+  schedule:
+    - cron: "0 0 * * 0" # Once a week
+
+jobs:
+  clean-up:
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/stale@v7
+        with:
+          stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days."
+          stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful."
+          close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity."
+          days-before-stale: 30
+          days-before-close: 20
+          days-before-pr-close: -1
+          any-of-labels: "awaiting-changes,awaiting-feedback"
+          exempt-issue-labels: "WIP"
+          exempt-pr-labels: "WIP"
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@ -24,7 +24,7 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}

-      - uses: actions/setup-node@v2
+      - uses: actions/setup-node@v3

      - name: Install Prettier
        run: npm install -g prettier @prettier/plugin-php
@ -34,9 +34,9 @@ jobs:
        id: prettier_status
        run: |
          if prettier --check ${GITHUB_WORKSPACE}; then
-            echo "::set-output name=result::pass"
+            echo "result=pass" >> $GITHUB_OUTPUT
          else
-            echo "::set-output name=result::fail"
+            echo "result=fail" >> $GITHUB_OUTPUT
          fi

      - name: Run 'prettier --write'
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@ -4,6 +4,8 @@ name: nf-core linting
 # that the code meets the nf-core guidelines.
 on:
  push:
+    branches:
+      - dev
  pull_request:
  release:
    types: [published]
@ -12,9 +14,9 @@ jobs:
  EditorConfig:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

-      - uses: actions/setup-node@v2
+      - uses: actions/setup-node@v3

      - name: Install editorconfig-checker
        run: npm install -g editorconfig-checker
@ -25,9 +27,9 @@ jobs:
  Prettier:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

-      - uses: actions/setup-node@v2
+      - uses: actions/setup-node@v3

      - name: Install Prettier
        run: npm install -g prettier
@ -38,7 +40,7 @@ jobs:
  PythonBlack:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      - name: Check code lints with Black
        uses: psf/black@stable
@ -69,14 +71,14 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out pipeline code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3

      - name: Install Nextflow
        uses: nf-core/setup-nextflow@v1

-      - uses: actions/setup-python@v3
+      - uses: actions/setup-python@v4
        with:
-          python-version: "3.7"
+          python-version: "3.8"
          architecture: "x64"

      - name: Install dependencies
@ -97,7 +99,7 @@ jobs:

      - name: Upload linting log file artifact
        if: ${{ always() }}
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
        with:
          name: linting-logs
          path: |
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@ -18,7 +18,7 @@ jobs:

      - name: Get PR number
        id: pr_number
-        run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)"
+        run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT

      - name: Post PR comment
        uses: marocchino/sticky-pull-request-comment@v2
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,5 @@
+repos:
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v2.7.1"
+    hooks:
+      - id: prettier
--- a/.prettierignore
+++ b/.prettierignore
@ -1,4 +1,6 @@
 email_template.html
+adaptivecard.json
+slackreport.json
 .nextflow*
 work/
 data/
@ -7,4 +9,5 @@ results/
 testing/
 testing*
 *.pyc
+bin/
 tests/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,10 +3,53 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## v1.0dev - [date]
+## v1.1.0dev - [date]
+
+### `Added`
+
+- [#276](https://github.com/nf-core/taxprofiler/pull/276) Implemented batching in the KrakenUniq samples processing. (added by @Midnighter)
+- [#272](https://github.com/nf-core/taxprofiler/pull/272) - Add saving of final 'analysis-ready-reads' to dedicated directory. (❤️ to @alexhbnr for reporting, added by @jfy133)
+
+### `Fixed`
+
+- [#271](https://github.com/nf-core/taxprofiler/pull/271/files) Improved standardised table generation documentation nd mOTUs manual database download tutorial (♥ to @prototaxites for reporting, fix by @jfy133)
+- [#269](https://github.com/nf-core/taxprofiler/pull/269/files) Reduced output files in AWS full test output due to very large files
+- [#270](https://github.com/nf-core/taxprofiler/pull/270/files) Fixed warning for host removal index parameter, and improved index checks (♥ to @prototaxites for reporting, fix by @jfy133)
+- [#274](https://github.com/nf-core/taxprofiler/pull/274/files) Substituted the samtools/bam2fq module with samtools/fastq module (fix by @sofstam)
+- [#275](https://github.com/nf-core/taxprofiler/pull/275/files) Replaced function used for error reporting to more Nextflow friendly method (fix by @jfy133)
+- [#285](https://github.com/nf-core/taxprofiler/pull/285/files) Fixed overly large log files in Kraken2 output (♥ to @prototaxites for reporting, fix by @Midnighter & @jfy133)
+- [#286](https://github.com/nf-core/taxprofiler/pull/286/files) Runtime optimisation of MultiQC step via improved log file processing (fix by @Midnighter & @jfy133)
+- [#289](https://github.com/nf-core/taxprofiler/pull/289/files) Pipeline updated to nf-core template 2.8 (fix by @Midnighter & @jfy133)
+- [#290](https://github.com/nf-core/taxprofiler/pull/286/files) Minor database input documentation improvements (♥ to @alneberg for reporting, fix by @jfy133)
+
+### `Dependencies`
+
+| Tool    | Previous version | New version |
+| ------- | ---------------- | ----------- |
+| MultiQC | 1.13             | 1.14        |
+
+### `Deprecated`
+
+## v1.0.1 - Dodgy Dachshund Patch [2023-05-15]
+
+### `Added`
+
+### `Fixed`
+
+- [#291](https://github.com/nf-core/taxprofiler/pull/291) - Fix Taxpasta not receiving taxonomy directory (❤️ to @SannaAb for reporting, fix by @jfy133)
+
+## v1.0.0 - Dodgy Dachshund [2023-03-13]

 Initial release of nf-core/taxprofiler, created with the [nf-core](https://nf-co.re/) template.

+- Add read quality control (sequencing QC, adapter removal and merging)
+- Add read complexity filtering
+- Add host-reads removal step
+- Add run merging
+- Add taxonomic classification
+- Add taxon table standardisation
+- Add post-classification visualisation
+
 ### `Added`

 ### `Fixed`
--- a/CITATION.cff
+++ b/CITATION.cff
@ -1,56 +0,0 @@
-cff-version: 1.2.0
-message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication"
-authors:
-  - family-names: Ewels
-    given-names: Philip
-  - family-names: Peltzer
-    given-names: Alexander
-  - family-names: Fillinger
-    given-names: Sven
-  - family-names: Patel
-    given-names: Harshil
-  - family-names: Alneberg
-    given-names: Johannes
-  - family-names: Wilm
-    given-names: Andreas
-  - family-names: Ulysse Garcia
-    given-names: Maxime
-  - family-names: Di Tommaso
-    given-names: Paolo
-  - family-names: Nahnsen
-    given-names: Sven
-title: "The nf-core framework for community-curated bioinformatics pipelines."
-version: 2.4.1
-doi: 10.1038/s41587-020-0439-x
-date-released: 2022-05-16
-url: https://github.com/nf-core/tools
-prefered-citation:
-  type: article
-  authors:
-    - family-names: Ewels
-      given-names: Philip
-    - family-names: Peltzer
-      given-names: Alexander
-    - family-names: Fillinger
-      given-names: Sven
-    - family-names: Patel
-      given-names: Harshil
-    - family-names: Alneberg
-      given-names: Johannes
-    - family-names: Wilm
-      given-names: Andreas
-    - family-names: Ulysse Garcia
-      given-names: Maxime
-    - family-names: Di Tommaso
-      given-names: Paolo
-    - family-names: Nahnsen
-      given-names: Sven
-  doi: 10.1038/s41587-020-0439-x
-  journal: nature biotechnology
-  start: 276
-  end: 278
-  title: "The nf-core framework for community-curated bioinformatics pipelines."
-  issue: 3
-  volume: 38
-  year: 2020
-  url: https://dx.doi.org/10.1038/s41587-020-0439-x
--- a/CITATIONS.md
+++ b/CITATIONS.md
@ -16,6 +16,10 @@

  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.

+- [falco](https://doi.org/10.12688/f1000research.21142.2)
+
+  > de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
+
 - [fastp](https://doi.org/10.1093/bioinformatics/bty560)

  > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
@ -26,19 +30,41 @@

 - [Porechop](https://github.com/rrwick/Porechop)

+- [FILTLONG](https://github.com/rrwick/Filtlong)
+
 - [BBTools](http://sourceforge.net/projects/bbmap/)

 - [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)

  > Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.

+- [Bowtie2](https://doi.org/10.1038/nmeth.1923)
+
+  > Langmead, B., & Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. Nature Methods, 9(4), 357–359. doi: 10.1038/nmeth.1923
+
+- [minimap2](https://doi.org/10.1093/bioinformatics/bty191)
+
+  > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics , 34(18), 3094–3100. doi: 10.1093/bioinformatics/bty191
+
+- [SAMTools](https://doi.org/10.1093/gigascience/giab008)
+
+  > Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2). doi: 10.1093/gigascience/giab008
+
+- [Bracken](https://doi.org/10.7717/peerj-cs.104)
+
+  > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
+
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)

  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.

- [Krona](https://doi.org/10.1186/1471-2105-12-385)
+- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)

-  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
+  > Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
+
+- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
+
+  > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088

 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)

@ -48,19 +74,25 @@

  > Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.

- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
+- [DIAMOND](https://doi.org/10.1038/nmeth.3176)

-  > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
+  > Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.

 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)

  > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.

- [DIAMOND](https://doi.org/10.1038/nmeth.3176)
+- [Kaiju](https://doi.org/10.1038/ncomms11257)

-> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
+  > Menzel, P., Ng, K. L., & Krogh, A. (2016). Fast and sensitive taxonomic classification for metagenomics with Kaiju. Nature Communications, 7, 11257. doi: 10.1038/ncomms11257

- [FILTLONG](https://github.com/rrwick/Filtlong)
+- [mOTUs](https://doi.org/10.1186/s40168-022-01410-z)
+
+  > Ruscheweyh, H.-J., Milanese, A., Paoli, L., Karcher, N., Clayssen, Q., Keller, M. I., Wirbel, J., Bork, P., Mende, D. R., Zeller, G., & Sunagawa, S. (2022). Cultivation-independent genomes greatly expand taxonomic-profiling capabilities of mOTUs across various environments. Microbiome, 10(1), 212. doi: 10.1186/s40168-022-01410-z
+
+- [Krona](https://doi.org/10.1186/1471-2105-12-385)
+
+  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.

 ## Software packaging/containerisation tools

@ -80,3 +112,13 @@

 - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/)
  > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.
+
+## Data
+
+- [Maixner (2021)](https://doi.org/10.1016/j.cub.2021.09.031) (CI Test Data)
+
+  > Maixner, Frank, Mohamed S. Sarhan, Kun D. Huang, Adrian Tett, Alexander Schoenafinger, Stefania Zingale, Aitor Blanco-Míguez, et al. 2021. “Hallstatt Miners Consumed Blue Cheese and Beer during the Iron Age and Retained a Non-Westernized Gut Microbiome until the Baroque Period.” Current Biology: CB 31 (23): 5149–62.e6. doi: 10.1016/j.cub.2021.09.031.
+
+- [Meslier (2022)](https://doi.org/10.1038/s41597-022-01762-z) (AWS Full Test data)
+
+  > Meslier, Victoria, Benoit Quinquis, Kévin Da Silva, Florian Plaza Oñate, Nicolas Pons, Hugo Roume, Mircea Podar, and Mathieu Almeida. 2022. “Benchmarking Second and Third-Generation Sequencing Platforms for Microbial Metagenomics.” Scientific Data 9 (1): 694. doi: 10.1038/s41597-022-01762-z.
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License

-Copyright (c) nf-core community
+Copyright (c) James A. Fellows Yates, Sofia Stamouli, Moritz E. Beber, and the nf-core/taxprofiler team

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/README.md
+++ b/README.md
@ -1,42 +1,31 @@
 # ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_dark.png#gh-dark-mode-only)

-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7728364-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7728364)

-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/taxprofiler)

-[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
+[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23taxprofiler-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/taxprofiler)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)

 ## Introduction

-> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
-
-<!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
-
-**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables.
-
-The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
-
-<!-- TODO nf-core: Add full-sized test dataset and amend the paragraph below if applicable -->
-
-On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/taxprofiler/results).
+**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic classification and profiling of shotgun and long-read metagenomic data. It allows for in-parallel taxonomic identification of reads or taxonomic abundance estimation with multiple classification and profiling tools against multiple databases, produces standardised output tables.

 ## Pipeline summary

-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
-
 ![](docs/images/taxprofiler_tube.png)

-1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
+1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
 2. Performs optional read pre-processing
-   - Adapter clipping and merging (short read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long read: [porechop](https://github.com/rrwick/Porechop))
-   - Low complexity filtering ([bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus))
-   - Host read removal ([BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/))
+   - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
+   - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
+   - Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
   - Run merging
-3. Performs taxonomic profiling using one or more of:
+3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
+4. Performs taxonomic classification and/or profiling using one or more of:
   - [Kraken2](https://ccb.jhu.edu/software/kraken2/)
   - [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
   - [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
@ -44,48 +33,101 @@ On release, automated continuous integration tests run the pipeline on a full-si
   - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
   - [Kaiju](https://kaiju.binf.ku.dk/)
   - [mOTUs](https://motu-tool.org/)
-   - [MetaMaps](https://github.com/DiltheyLab/MetaMaps)
-4. Perform optional post-processing with:
+   - [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
+5. Perform optional post-processing with:
   - [bracken](https://ccb.jhu.edu/software/bracken/)
-5. Standardises output tables
-6. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+6. Standardises output tables ([`Taxpasta`](https://taxpasta.readthedocs.io))
+7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
+
+## Usage
+
+> **Note**
+> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how
+> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
+> with `-profile test` before running the workflow on actual data.
+
+First, prepare a samplesheet with your input data that looks as follows:
+
+`samplesheet.csv`:

-## Quick Start
+```csv
+sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
+2612,run1,ILLUMINA,2612_run1_R1.fq.gz,,
+2612,run2,ILLUMINA,2612_run2_R1.fq.gz,,
+2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz,
+```

-1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`)
+Each row represents a fastq file (single-end), a pair of fastq files (paired end), or a fasta (with long reads).

-2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
+Additionally, you will need a database sheet that looks as follows:

-3. Download the pipeline and test it on a minimal dataset with a single command:
+`databases.csv`:

-   ```bash
-   nextflow run nf-core/taxprofiler -profile test,YOURPROFILE --outdir <OUTDIR>
-   ```
+```
+tool,db_name,db_params,db_path
+kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
+metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
+```

-   Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string.
+That includes directories or `.tar.gz` archives containing databases for the tools you wish to run the pipeline against.

-   > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`.
-   > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
-   > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs.
-   > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs.
+Now, you can run the pipeline using:

-4. Start running your own analysis!
+```bash
+nextflow run nf-core/taxprofiler \
+   -profile <docker/singularity/.../institute> \
+   --input samplesheet.csv \
+   --databases databases.csv \
+   --outdir <OUTDIR>  \
+   --run_kraken2 --run_metaphlan3
+```

-   ```console
-   nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
-   ```
+> **Warning:**
+> Please provide pipeline parameters via the CLI (as above) or Nextflow `-params-file` option. Custom config files including those
+> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
+> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).

-## Documentation
+For more details, please refer to the [usage documentation](https://nf-co.re/taxprofiler/usage) and the [parameter documentation](https://nf-co.re/taxprofiler/parameters).

-The nf-core/taxprofiler pipeline comes with documentation about the pipeline [usage](https://nf-co.re/taxprofiler/usage), [parameters](https://nf-co.re/taxprofiler/parameters) and [output](https://nf-co.re/taxprofiler/output).
+## Pipeline output
+
+To see the results of a test run with a full size dataset refer to the [results](https://nf-co.re/taxprofiler/results) tab on the nf-core website pipeline page.
+For more details about the output files and reports, please refer to the
+[output documentation](https://nf-co.re/taxprofiler/output).

 ## Credits

-nf-core/taxprofiler was originally written by nf-core community.
+nf-core/taxprofiler was originally written by James A. Fellows Yates, Sofia Stamouli, Moritz E. Beber, and the nf-core/taxprofiler team.
+
+### Team
+
+- [James A. Fellows Yates](https://github.com/jfy133)
+- [Sofia Stamouli](https://github.com/sofstam)
+- [Moritz E. Beber](https://github.com/Midnighter)
+
+We thank the following people for their contributions to the development of this pipeline:
+
+- [Lauri Mesilaakso](https://github.com/ljmesi)
+- [Tanja Normark](https://github.com/talnor)
+- [Maxime Borry](https://github.com/maxibor)
+- [Thomas A. Christensen II](https://github.com/MillironX)
+- [Jianhong Ou](https://github.com/jianhong)
+- [Rafal Stepien](https://github.com/rafalstepien)
+- [Mahwash Jamy](https://github.com/mjamy)

-We thank the following people for their extensive assistance in the development of this pipeline:
+### Acknowledgments

-[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor).
+We also are grateful for the feedback and comments from:
+
+- The general [nf-core/community](https://nf-co.re/community)
+
+And specifically to
+
+- [Alex Hübner](https://github.com/alexhbnr)
+- [Lily Andersson Lee](https://github.com/LilyAnderssonLee)
+
+❤️ also goes to [Zandra Fagernäs](https://github.com/ZandraFagernas) for the logo.

 ## Contributions and Support

@ -95,10 +137,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#

 ## Citations

-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use  nf-core/taxprofiler for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
-
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
+If you use nf-core/taxprofiler for your analysis, please cite it using the following doi: [10.5281/zenodo.7728364](https://doi.org/10.5281/zenodo.7728364)

 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.

--- a/assets/adaptivecard.json
+++ b/assets/adaptivecard.json
@ -0,0 +1,67 @@
+{
+    "type": "message",
+    "attachments": [
+        {
+            "contentType": "application/vnd.microsoft.card.adaptive",
+            "contentUrl": null,
+            "content": {
+                "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
+                "msteams": {
+                    "width": "Full"
+                },
+                "type": "AdaptiveCard",
+                "version": "1.2",
+                "body": [
+                    {
+                        "type": "TextBlock",
+                        "size": "Large",
+                        "weight": "Bolder",
+                        "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>",
+                        "text": "nf-core/taxprofiler v${version} - ${runName}",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "spacing": "None",
+                        "text": "Completed at ${dateComplete} (duration: ${duration})",
+                        "isSubtle": true,
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "The command used to launch the workflow was as follows:",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "${commandLine}",
+                        "isSubtle": true,
+                        "wrap": true
+                    }
+                ],
+                "actions": [
+                    {
+                        "type": "Action.ShowCard",
+                        "title": "Pipeline Configuration",
+                        "card": {
+                            "type": "AdaptiveCard",
+                            "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
+                            "body": [
+                                {
+                                    "type": "FactSet",
+                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %>
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                ]
+            }
+        }
+    ]
+}
--- a/assets/email_template.html
+++ b/assets/email_template.html
@ -4,7 +4,7 @@
  <meta http-equiv="X-UA-Compatible" content="IE=edge">
  <meta name="viewport" content="width=device-width, initial-scale=1">

-  <meta name="description" content="nf-core/taxprofiler: Taxonomic profiling of shotgun metagenomic data">
+  <meta name="description" content="nf-core/taxprofiler: Taxonomic classification and profiling of shotgun and long-read metagenomic data">
  <title>nf-core/taxprofiler Pipeline Report</title>
 </head>
 <body>
--- a/assets/methods_description_template.yml
+++ b/assets/methods_description_template.yml
@ -0,0 +1,25 @@
+id: "nf-core-taxprofiler-methods-description"
+description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
+section_name: "nf-core/taxprofiler Methods Description"
+section_href: "https://github.com/nf-core/taxprofiler"
+plot_type: "html"
+## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline
+## You inject any metadata in the Nextflow '${workflow}' object
+data: |
+  <h4>Methods</h4>
+  <p>Data was processed using nf-core/taxprofiler v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>).</p>
+  <p>The pipeline was executed with Nextflow v${workflow.nextflow.version} (<a href="https://doi.org/10.1038/nbt.3820">Di Tommaso <em>et al.</em>, 2017</a>) with the following command:</p>
+  <pre><code>${workflow.commandLine}</code></pre>
+  <h4>References</h4>
+  <ul>
+    <li>Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. <a href="https://doi.org/10.1038/nbt.3820">https://doi.org/10.1038/nbt.3820</a></li>
+    <li>Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. <a href="https://doi.org/10.1038/s41587-020-0439-x">https://doi.org/10.1038/s41587-020-0439-x</a></li>
+  </ul>
+  <div class="alert alert-info">
+    <h5>Notes:</h5>
+    <ul>
+      ${nodoi_text}
+      <li>The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!</li>
+      <li>You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.</li>
+    </ul>
+  </div>
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@ -3,10 +3,12 @@ report_comment: >
  analysis pipeline. For information about how to interpret these results, please see the
  <a href="https://nf-co.re/taxprofiler" target="_blank">documentation</a>.
 report_section_order:
-  software_versions:
+  "nf-core-taxprofiler-methods-description":
    order: -1000
-  "nf-core-taxprofiler-summary":
+  software_versions:
    order: -1001
+  "nf-core-taxprofiler-summary":
+    order: -1002

 export_plots: true

@ -18,37 +20,262 @@ run_modules:
  - fastqc
  - adapterRemoval
  - fastp
+  - bbduk
+  - prinseqplusplus
+  - porechop
+  - filtlong
  - bowtie2
+  - minimap2
+  - samtools
  - kraken
+  - kaiju
+  - metaphlan
+  - diamond
  - malt
+  - motus
  - custom_content

-#extra_fn_clean_exts:
-#    - '_fastp'
-#    - '.pe.settings'
-#    - '.se.settings'
+sp:
+  diamond:
+    fn_re: ".*.diamond.log$"
+  fastqc/data:
+    fn_re: ".*(fastqc|falco)_data.txt$"
+  fastqc/zip:
+    fn: "*_fastqc.zip"

 top_modules:
  - "fastqc":
-      name: "FastQC (pre-Trimming)"
+      name: "FastQC / Falco (pre-Trimming)"
      path_filters:
-        - "*raw_*fastqc.zip"
-  - "fastp"
-  - "adapterRemoval"
+        - "*raw*"
+      path_filters_exclude:
+        - "*processed*"
+      extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
  - "fastqc":
-      name: "FastQC (post-Trimming)"
+      name: "FastQC / Falco (post-Trimming)"
      path_filters:
-        - "*raw_*processed.zip"
+        - "*processed*"
+      path_filters_exclude:
+        - "*raw*"
+      extra: "If used in this run, Falco is a drop-in replacement for FastQC producing the same output, written by Guilherme de Sena Brandine and Andrew D. Smith."
+  - "fastp"
+  - "adapterRemoval"
+  - "porechop":
+      extra: "ℹ️: if you get the error message 'Error - was not able to plot data.' this means that porechop did not detect any adapters and therefore no statistics generated."
+  - "bbduk"
+  - "prinseqplusplus"
+  - "filtlong"
+  - "bowtie2":
+      name: "bowtie2"
+  - "samtools":
+      name: "Samtools Stats"
  - "kraken":
      name: "Kraken"
      path_filters:
-        - "*.kraken2.report.txt"
+        - "*.kraken2.kraken2.report.txt"
+  - "kraken":
+      name: "Bracken"
+      anchor: "bracken"
+      target: "Bracken"
+      doi: "10.7717/peerj-cs.104"
+      info: "Estimates species abundances in metagenomics samples by probabilistically re-distributing reads in the taxonomic tree."
+      extra: "ℹ️: plot title will say Kraken2 due to the first step of bracken producing the same output format as Kraken. Abundance information is currently not supported in MultiQC."
+      path_filters:
+        - "*.bracken.kraken2.report.txt"
  - "kraken":
      name: "Centrifuge"
      anchor: "centrifuge"
      target: "Centrifuge"
      doi: "10.1101/gr.210641.116"
      info: "is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. The system uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index. Note: Figure title"
-      extra: "Note: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
+      extra: "ℹ️: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
      path_filters:
        - "*.centrifuge.txt"
+  - "malt":
+      name: "MALT"
+  - "diamond"
+  - "kaiju":
+      name: "Kaiju"
+  - "motus"
+
+#It is not possible to set placement for custom kraken and centrifuge columns.
+
+table_columns_placement:
+  FastQC / Falco (pre-Trimming):
+    total_sequences: 100
+    avg_sequence_length: 110
+    median_sequence_length: 120
+    percent_duplicates: 130
+    percent_gc: 140
+    percent_fails: 150
+  FastQC / Falco (post-Trimming):
+    total_sequences: 200
+    avg_sequence_length: 210
+    median_sequence_length: 220
+    percent_duplicates: 230
+    percent_gc: 240
+    percent_fails: 250
+  fastp:
+    pct_adapter: 300
+    pct_surviving: 310
+    pct_duplication: 320
+    after_filtering_gc_content: 330
+    after_filtering_q30_rate: 340
+    after_filtering_q30_bases: 350
+    filtering_result_passed_filter_reads: 360
+  Adapter Removal:
+    aligned_total: 360
+    percent_aligned: 370
+    percent_collapsed: 380
+    percent_discarded: 390
+  Porechop:
+    Input Reads: 400
+    Start Trimmed: 410
+    Start Trimmed Percent: 420
+    End Trimmed: 430
+    End Trimmed Percent: 440
+    Middle Split: 450
+    Middle Split Percent: 460
+  Filtlong:
+    Target bases: 500
+  BBDuk:
+    Input reads: 800
+    Total Removed bases percent: 810
+    Total Removed bases: 820
+    Total Removed reads percent: 830
+    Total Removed reads: 840
+  PRINSEQ++:
+    prinseqplusplus_total: 900
+  bowtie2:
+    overall_alignment_rate: 1000
+  Samtools Stats:
+    raw_total_sequences: 1100
+    reads_mapped: 1110
+    reads_mapped_percent: 1120
+    reads_properly_paired_percent: 1130
+    non-primary_alignments: 1140
+    reads_MQ0_percent: 1150
+    error_rate: 1160
+  Bracken:
+    "% Unclassified": 1200
+    "% Top 5": 1210
+  Centrifuge:
+    "% Unclassified": 1300
+    "% Top 5": 1310
+  DIAMOND:
+    queries_aligned: 1400
+  Kaiju:
+    assigned: 1500
+    "% Assigned": 1510
+    "% Unclassified": 1520
+  Kraken:
+    "% Unclassified": 1600
+    "% Top 5": 1610
+  MALT:
+    "Num. of queries": 1700
+    Total reads: 1710
+    Mappability: 1720
+    Assig. Taxonomy: 1730
+    Taxonomic assignment success: 1740
+  motus:
+    Total number of reads: 1800
+    Number of reads after filtering: 1810
+    Total number of inserts: 1820
+    Unique mappers: 1830
+    Multiple mappers: 1840
+    Ignored multiple mapper without unique hit: 1850
+    "Number of ref-mOTUs": 1860
+    "Number of meta-mOTUs": 1870
+    "Number of ext-mOTUs": 1880
+
+table_columns_visible:
+  FastQC / Falco (pre-Trimming):
+    total_sequences: True
+    avg_sequence_length: True
+    percent_duplicates: True
+    percent_gc: True
+    percent_fails: False
+  FastQC / Falco (post-Trimming):
+    total_sequences: True
+    avg_sequence_length: True
+    percent_duplicates: False
+    percent_gc: False
+    percent_fails: False
+  porechop:
+    Input reads: False
+    Start Trimmed:
+    Start Trimmed Percent: True
+    End Trimmed: False
+    End Trimmed Percent: True
+    Middle Split: False
+    Middle Split Percent: True
+  fastp:
+    pct_adapter: True
+    pct_surviving: True
+    pct_duplication: False
+    after_filtering_gc_content: False
+    after_filtering_q30_rate: False
+    after_filtering_q30_bases: False
+  Filtlong:
+    Target bases: True
+  Adapter Removal:
+    aligned_total: True
+    percent_aligned: True
+    percent_collapsed: True
+    percent_discarded: False
+  BBDuk:
+    Input reads: False
+    Total Removed bases Percent: False
+    Total Removed bases: False
+    Total Removed reads percent: True
+    Total Removed reads: False
+  "PRINSEQ++":
+    prinseqplusplus_total: True
+  bowtie2:
+    overall_alignment_rate: True
+  Samtools Stats:
+    raw_total_sequences: True
+    reads_mapped: True
+    reads_mapped_percent: True
+    reads_properly_paired_percent: False
+    non-primary_alignments: False
+    reads_MQ0_percent: False
+    error_rate: False
+  Kraken: False
+  Bracken: False
+  Centrifuge: False
+  DIAMOND: False
+  Kaiju: False
+  MALT: False
+  motus: False
+
+table_columns_name:
+  FastQC / Falco (pre-Trimming):
+    total_sequences: "Nr. Input Reads"
+    avg_sequence_length: "Length Input Reads"
+    percent_gc: "% GC Input Reads"
+    percent_duplicates: "% Dups Input Reads"
+    percent_fails: "% Failed Input Reads"
+  FastQC / Falco (post-Trimming):
+    total_sequences: "Nr. Processed Reads"
+    avg_sequence_length: "Length Processed Reads"
+    percent_gc: "% GC Processed Reads"
+    percent_duplicates: "% Dups Processed Reads"
+    percent_fails: "% Failed Processed Reads"
+  Samtools Stats:
+    raw_total_sequences: "Nr. Reads Into Mapping"
+    reads_mapped: "Nr. Mapped Reads"
+    reads_mapped_percent: "% Mapped Reads"
+
+extra_fn_clean_exts:
+  - "kraken2.report.txt"
+  - ".txt"
+  - ".settings"
+  - ".bbduk"
+  - ".unmapped"
+  - "_filtered"
+  - type: remove
+    pattern: "_falco"
+
+section_comments:
+  general_stats: "By default, all read count columns are displayed as millions (M) of reads."
--- a/assets/slackreport.json
+++ b/assets/slackreport.json
@ -0,0 +1,34 @@
+{
+    "attachments": [
+        {
+            "fallback": "Plain-text summary of the attachment.",
+            "color": "<% if (success) { %>good<% } else { %>danger<%} %>",
+            "author_name": "sanger-tol/readmapping v${version} - ${runName}",
+            "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
+            "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
+            "fields": [
+                {
+                    "title": "Command used to launch the workflow",
+                    "value": "```${commandLine}```",
+                    "short": false
+                }
+                <%
+                    if (!success) { %>
+                    ,
+                    {
+                        "title": "Full error message",
+                        "value": "```${errorReport}```",
+                        "short": false
+                    },
+                    {
+                        "title": "Pipeline configuration",
+                        "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>",
+                        "short": false
+                    }
+                    <% }
+                %>
+            ],
+            "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})"
+        }
+    ]
+}
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@ -50,13 +50,9 @@ def check_samplesheet(file_in, file_out):

    FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
    FA_EXTENSIONS = (
-        ".fa",
        ".fa.gz",
-        ".fasta",
        ".fasta.gz",
-        ".fna",
        ".fna.gz",
-        ".fas",
        ".fas.gz",
    )
    INSTRUMENT_PLATFORMS = [
@ -75,7 +71,6 @@ def check_samplesheet(file_in, file_out):

    sample_mapping_dict = {}
    with open(file_in, "r") as fin:
-
        ## Check header
        MIN_COLS = 4
        HEADER = [
@ -105,19 +100,17 @@ def check_samplesheet(file_in, file_out):

        ## Check sample entries
        for line in fin:
-
            ## Pull out only relevant columns for downstream checking
            line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
-            lspl = [line_parsed[i] for i in header_locs.values()]

            # Check valid number of columns per row
-            if len(lspl) < len(HEADER):
+            if len(line_parsed) < len(HEADER):
                print_error(
                    "Invalid number of columns (minimum = {})!".format(len(HEADER)),
                    "Line",
                    line,
                )
-            num_cols = len([x for x in lspl if x])
+            num_cols = len([x for x in line_parsed if x])
            if num_cols < MIN_COLS:
                print_error(
                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
@ -125,6 +118,8 @@ def check_samplesheet(file_in, file_out):
                    line,
                )

+            lspl = [line_parsed[i] for i in header_locs.values()]
+
            ## Check sample name entries

            (
@ -173,7 +168,7 @@ def check_samplesheet(file_in, file_out):
            else:
                if instrument_platform not in INSTRUMENT_PLATFORMS:
                    print_error(
-                        f"Instrument platform {instrument_platform} is not supported!",
+                        f"Instrument platform {instrument_platform} is not supported! "
                        f"List of supported platforms {', '.join(INSTRUMENT_PLATFORMS)}",
                        "Line",
                        line,
--- a/conf/base.config
+++ b/conf/base.config
@ -10,12 +10,11 @@

 process {

-    // TODO nf-core: Check the defaults for all processes
    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
    time   = { check_max( 4.h  * task.attempt, 'time'   ) }

-    errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
+    errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
    maxRetries    = 1
    maxErrors     = '-1'

@ -24,11 +23,10 @@ process {
    //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
    //        If possible, it would be nice to keep the same label naming convention when
    //        adding in your local modules too.
-    // TODO nf-core: Customise requirements for specific processes.
    // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
    withLabel:process_single {
        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        memory = { check_max( 1.GB * task.attempt, 'memory'  ) }
        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
    }
    withLabel:process_low {
@ -62,4 +60,27 @@ process {
    withName:CUSTOM_DUMPSOFTWAREVERSIONS {
        cache = false
    }
+
+    withName: BRACKEN_BRACKEN {
+        errorStrategy = 'ignore'
+    }
+
+    withName: CENTRIFUGE_KREPORT {
+        errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
+    }
+
+    withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE {
+        errorStrategy = { task.exitStatus in [255,1] ? 'ignore' : 'retry' }
+    }
+
+    withName: MEGAN_RMA2INFO_TSV {
+        cpus   = { check_max( 1                  , 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    }
+    withName: MEGAN_RMA2INFO_KRONA {
+        cpus   = { check_max( 1                  , 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    }
 }
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@ -36,6 +36,14 @@ params {
            macs_gsize  = "2.7e9"
            blacklist   = "${projectDir}/assets/blacklists/hg38-blacklist.bed"
        }
+        'CHM13' {
+            fasta       = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa"
+            bwa         = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/"
+            bwamem2     = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/"
+            gtf         = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf"
+            gff         = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz"
+            mito_name   = "chrM"
+        }
        'GRCm38' {
            fasta       = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa"
            bwa         = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/"
--- a/conf/modules.config
+++ b/conf/modules.config
@ -12,39 +12,41 @@

 process {

-    withName: SAMPLESHEET_CHECK {
+    withName: FASTQC {
+        ext.args = '--quiet'
+        ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
+            path: { "${params.outdir}/fastqc/raw" },
            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            pattern: '*.{html,zip}'
        ]
    }

-    withName: DATABASE_CHECK {
+    withName: FASTQC_PROCESSED {
+        ext.args = '--quiet'
+        ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
+            path: { "${params.outdir}/fastqc/processed" },
            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            pattern: '*.{html,zip}'
        ]
    }

-    withName: FASTQC {
-        ext.args = '--quiet'
-        ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
+    withName: FALCO {
+        ext.prefix = { "${meta.id}_${meta.run_accession}_raw_falco" }
        publishDir = [
-            path: { "${params.outdir}/fastqc/raw" },
+            path: { "${params.outdir}/falco/raw" },
            mode: params.publish_dir_mode,
-            pattern: '*.html'
+            pattern: '*.{html,txt,zip}'
        ]
    }

-    withName: FASTQC_PROCESSED {
-        ext.args = '--quiet'
-        ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
+    withName: FALCO_PROCESSED {
+        ext.prefix = { "${meta.id}_${meta.run_accession}_processed_falco" }
        publishDir = [
-            path: { "${params.outdir}/fastqc/processed" },
+            path: { "${params.outdir}/falco/processed" },
            mode: params.publish_dir_mode,
-            pattern: '*.html'
+            pattern: '*.{html,txt,zip}'
        ]
    }

@ -52,55 +54,97 @@ process {
        ext.args   = [
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
-            params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
+            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
            // filtering options
            "--length_required ${params.shortread_qc_minlength}",
            (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/fastp" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/fastp" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/fastp" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{log,html,json}'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                // Don't know why `!` doesn't work here, but `== false` makes it work...
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && !params.perform_shortread_complexityfilter && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

    withName: FASTP_PAIRED {
        ext.args   = [
            // collapsing options - option to retain singletons
-            params.shortread_qc_excludeunmerged ? '' : "--include_unmerged",
+            params.shortread_qc_includeunmerged ? '--include_unmerged' : '',
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
-            params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
-            params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
+            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
+            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
            // filtering options
            "--length_required ${params.shortread_qc_minlength}",
            params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/fastp" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/fastp" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/fastp" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{log,html,json}'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: params.shortread_qc_mergepairs ? '*merged.fastq.gz' : '*.fastp.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs:  { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && !params.perform_shortread_complexityfilter && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

    withName: ADAPTERREMOVAL_SINGLE {
        ext.args   = [
            // trimming options
-            params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
-            params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
+            params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
            // filtering options
            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/adapterremoval" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/adapterremoval" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/adapterremoval" },
+                mode: params.publish_dir_mode,
+                pattern: '*.settings'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*truncated.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && !params.perform_shortread_complexityfilter && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

@ -109,28 +153,69 @@ process {
            // collapsing options
            params.shortread_qc_mergepairs ? "--collapse" : "",
            // trimming options
-            params.shortread_qc_skipadaptertrim ? "--adapter1 '' --adapter2 ''" : "",
-            params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
-            params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
+            params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
+            params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
            // filtering options
            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/adapterremoval" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/adapterremoval" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/adapterremoval" },
+                mode: params.publish_dir_mode,
+                pattern: '*.settings'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*{truncated.fastq,singleton.truncated}.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && !params.perform_shortread_complexityfilter && params.perform_shortread_qc && !params.shortread_qc_mergepairs && params.save_analysis_ready_fastqs ? it : null}
+            ]
        ]
    }

-    withName: PORECHOP {
+    // AdapterRemoval separate output merging
+    withName: CAT_FASTQ {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/porechop" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && !params.perform_shortread_complexityfilter && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
+        ]
+    }
+
+    withName: PORECHOP_PORECHOP {
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            [
+                path: { "${params.outdir}/porechop" },
+                mode: params.publish_dir_mode,
+                pattern: '*_porechopped.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/porechop" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*_porechopped.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

@ -143,41 +228,123 @@ process {
        .join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
        publishDir = [
-            path: { "${params.outdir}/filtlong" },
-            mode: params.publish_dir_mode,
-            pattern: '*.{fastq.gz,log}',
-            enabled: params.save_preprocessed_reads
+            [
+                path: { "${params.outdir}/filtlong" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_preprocessed_reads
+            ],
+            [
+                path: { "${params.outdir}/filtlong" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && !params.longread_qc_skipqualityfilter && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }
+            ]
+        ]
+    }
+
+    withName: BBMAP_BBDUK {
+        ext.args =  [
+                "entropy=${params.shortread_complexityfilter_entropy}",
+                "entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
+                params.shortread_complexityfilter_bbduk_mask ?  "entropymask=t" : "entropymask=f"
+            ].join(' ').trim()
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            [
+                path: { "${params.outdir}/bbduk/" },
+                mode: params.publish_dir_mode,
+                pattern: '*.{fastq.gz}',
+                enabled: params.save_complexityfiltered_reads
+            ],
+            [
+                path: { "${params.outdir}/bbduk/" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_fastqs ? it : null }
+            ]
+        ]
+    }
+
+    withName: PRINSEQPLUSPLUS {
+        ext.args =  [
+                params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
+                "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0",
+            ].join(' ').trim()
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
+        publishDir = [
+            [
+                path: { "${params.outdir}/prinseqplusplus/" },
+                mode: params.publish_dir_mode,
+                pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}',
+                enabled: params.save_complexityfiltered_reads
+            ],
+            [
+                path: { "${params.outdir}/prinseqplusplus/" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_shortread_hostremoval && params.shortread_complexityfilter_tool && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

    withName: BOWTIE2_BUILD {
        publishDir = [
+            [
            path: { "${params.outdir}/bowtie2/build" },
            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_index,
-            pattern: 'bowtie2'
+            pattern: 'bowtie2',
+            enabled: params.save_hostremoval_index
+            ]
        ]
    }

+    // Saving unmapped reads as FQ comes via input channel!
    withName: BOWTIE2_ALIGN {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
-            path: { "${params.outdir}/bowtie2/align" },
-            mode: params.publish_dir_mode,
-            pattern: '*.log'
+                path: { "${params.outdir}/bowtie2/align" },
+                mode: params.publish_dir_mode,
+                pattern: '*.log'
            ],
            [
-            path: { "${params.outdir}/bowtie2/align" },
-            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_mapped,
-            pattern: '*.bam'
+                path: { "${params.outdir}/bowtie2/align" },
+                mode: params.publish_dir_mode,
+                pattern: '*.bam',
+                enabled: params.save_hostremoval_bam
            ],
            [
-            path: { "${params.outdir}/bowtie2/align" },
-            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_unmapped,
-            pattern: '*.fastq.gz'
+                path: { "${params.outdir}/bowtie2/align" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_hostremoval_unmapped
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                enabled: params.perform_shortread_hostremoval,
+                pattern: '*.fastq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_hostremoval && params.save_analysis_ready_fastqs ? it : null }
            ]
        ]
    }
@ -187,8 +354,8 @@ process {
        publishDir = [
            path: { "${params.outdir}/minimap2/index" },
            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_index,
-            pattern: 'minimap2'
+            pattern: '*.mmi',
+            enabled: params.save_hostremoval_index
        ]
    }

@ -197,74 +364,64 @@ process {
        publishDir = [
            path: { "${params.outdir}/minimap2/align" },
            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_mapped,
-            pattern: '*.bam'
+            pattern: '*.bam',
+            enabled: params.save_hostremoval_bam
        ]
    }

    withName: SAMTOOLS_VIEW {
        ext.args = '-f 4'
-        ext.prefix = { "${meta.id}.mapped.sorted" }
-        publishDir = [
-            path: { "${params.outdir}/samtools/view" },
-            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_unmapped,
-            pattern: '*.bam'
-        ]
-    }
-
-    withName: SAMTOOLS_BAM2FQ {
-        ext.prefix = { "${meta.id}_${meta.run_accession}" }
-        publishDir = [
-            path: { "${params.outdir}/samtools/bam2fq" },
-            mode: params.publish_dir_mode,
-            enabled: params.save_hostremoval_unmapped,
-            pattern: '*.fq.gz'
-        ]
+        ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
    }

-    withName: BBMAP_BBDUK {
-        ext.args =  [
-                "entropy=${params.shortread_complexityfilter_entropy}",
-                "entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
-                params.shortread_complexityfilter_bbduk_mask ?  "entropymask=t" : "entropymask=f"
-            ].join(' ').trim()
-        ext.prefix = { "${meta.id}-${meta.run_accession}" }
+    withName: SAMTOOLS_FASTQ {
+        ext.prefix = { "${meta.id}_${meta.run_accession}.unmapped" }
        publishDir = [
-            path: { "${params.outdir}/bbduk/" },
-            mode: params.publish_dir_mode,
-            pattern: '*.{fastq.gz,log}',
-            enabled: params.save_complexityfiltered_reads
+            [
+                path: { "${params.outdir}/samtools/fastq" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_hostremoval_unmapped
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fq.gz',
+                enabled: params.save_analysis_ready_fastqs,
+                saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun) ) && params.perform_longread_hostremoval && params.save_analysis_ready_fastqs ? it : null }
+            ]
        ]
    }

-    withName: PRINSEQPLUSPLUS {
-        ext.args =  [
-                params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
-                "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0",
-                "-VERBOSE 2"
-            ].join(' ').trim()
-        ext.prefix = { "${meta.id}-${meta.run_accession}" }
+    withName: SAMTOOLS_STATS {
+        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
-            path: { "${params.outdir}/prinseqplusplus/" },
+            path: { "${params.outdir}/samtools/stats" },
            mode: params.publish_dir_mode,
-            pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}',
-            enabled: params.save_complexityfiltered_reads
+            pattern: '*stats'
        ]
    }

-    withName: CAT_FASTQ {
+    withName: MERGE_RUNS {
        ext.prefix = { "${meta.id}" }
        publishDir = [
-            path: { "${params.outdir}/run_merging/" },
-            mode: params.publish_dir_mode,
-            pattern: '*.fastq.gz',
-            enabled: params.save_runmerged_reads
+            [
+                path: { "${params.outdir}/run_merging/" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.save_runmerged_reads
+            ],
+            [
+                path: { "${params.outdir}/analysis_ready_fastqs" },
+                mode: params.publish_dir_mode,
+                pattern: '*.fastq.gz',
+                enabled: params.perform_runmerging && params.save_analysis_ready_fastqs
+            ]
        ]
    }

    withName: MALT_RUN {
-        ext.args = { "${meta.db_params}" }
+        ext.args = { "${meta.db_params} -m ${params.malt_mode}" }
        // one run with multiple samples, so fix ID to just db name to ensure clean log name
        ext.prefix = { "${meta.db_name}" }
        publishDir = [
@ -285,26 +442,56 @@ process {
    }

    withName: KRAKEN2_KRAKEN2 {
-        ext.args = { "${meta.db_params}" }
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" }
+        ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}_${meta.db_name}.bracken" : "${meta.id}_${meta.db_name}.kraken2" } : { meta.tool == "bracken" ? "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" : "${meta.id}_${meta.run_accession}_${meta.db_name}.kraken2" }
        publishDir = [
            path: { "${params.outdir}/kraken2/${meta.db_name}/" },
            mode: params.publish_dir_mode,
-            pattern: '*.{txt,report,fastq.gz}'
+            pattern: '*.{txt,fastq.gz}'
        ]
    }

-    withName: KRAKENTOOLS_COMBINEKREPORTS {
+    withName: BRACKEN_BRACKEN {
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.bracken" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.bracken" }
+        publishDir = [
+            path: { "${params.outdir}/bracken/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.tsv'
+        ]
+    }
+
+    withName: BRACKEN_COMBINEBRACKENOUTPUTS {
+        ext.prefix = { "bracken_${meta.id}_combined_reports" }
+        publishDir = [
+            path: { "${params.outdir}/bracken/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.txt'
+        ]
+    }
+
+    withName: KRAKENTOOLS_COMBINEKREPORTS_KRAKEN {
        ext.prefix = { "kraken2_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/kraken2/" },
            mode: params.publish_dir_mode,
-            pattern: '*.{txt}'
+            pattern: '*.txt'
+        ]
+    }
+
+    withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
+        ext.args = { "${meta.db_params}" }
+        // one run with multiple samples, so fix ID to just db name to ensure clean log name
+        ext.prefix = { "${meta.db_name}.krakenuniq" }
+        publishDir = [
+            path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{txt,fastq.gz}'
        ]
    }

    withName: KRONA_CLEANUP {
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
@ -313,7 +500,7 @@ process {
    }

    withName: KRONA_KTIMPORTTEXT {
-        ext.prefix = { "${meta.tool}-${meta.id}" }
+        ext.prefix = { "${meta.tool}_${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
@ -323,12 +510,12 @@ process {

    withName: 'MEGAN_RMA2INFO_KRONA' {
        ext.args = { "--read2class Taxonomy" }
-        ext.prefix = { "${meta.id}-${meta.db_name}" }
+        ext.prefix = { "${meta.id}_${meta.db_name}" }
    }

    withName: KRONA_KTIMPORTTAXONOMY {
        ext.args = "-i"
-        ext.prefix = { "${meta.tool}-${meta.id}" }
+        ext.prefix = { "${meta.tool}_${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
@ -338,7 +525,7 @@ process {

    withName: METAPHLAN3_METAPHLAN3 {
        ext.args = { "${meta.db_params}" }
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.metaphlan3" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.metaphlan3" }
        publishDir = [
            path: { "${params.outdir}/metaphlan3/${meta.db_name}/" },
            mode: params.publish_dir_mode,
@ -356,19 +543,18 @@ process {
    }

    withName: CENTRIFUGE_CENTRIFUGE {
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.centrifuge" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.centrifuge" }
        publishDir = [
            path: { "${params.outdir}/centrifuge/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt,sam,gz}'
        ]
-        ext.args = { "${meta.db_params}" }
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
    }

    withName: CENTRIFUGE_KREPORT {
-        errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
        ext.args = { "${meta.db_params}" }
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.centrifuge" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.centrifuge" }
        publishDir = [
            path: { "${params.outdir}/centrifuge/${meta.db_name}/" },
            mode: params.publish_dir_mode,
@ -386,16 +572,25 @@ process {
    }

    withName: KAIJU_KAIJU {
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.args = { "${meta.db_params}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaiju" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaiju" }
        publishDir = [
            path: { "${params.outdir}/kaiju/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.tsv'
        ]
-        ext.args = { "${meta.db_params}" }
    }

-    withName: KAIJU_KAIJU2TABLE {
+    withName: 'KAIJU_KAIJU2TABLE_SINGLE' {
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
+        publishDir = [
+            path: { "${params.outdir}/kaiju/${meta.db_name}/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{txt}'
+        ]
+    }
+
+    withName: 'KAIJU_KAIJU2TABLE_COMBINED' {
        ext.prefix = { "kaiju_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/kaiju/" },
@ -410,7 +605,7 @@ process {

    withName: DIAMOND_BLASTX {
        ext.args = { "${meta.db_params}" }
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.diamond" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.diamond" }
        publishDir = [
            path: { "${params.outdir}/diamond/${meta.db_name}/" },
            mode: params.publish_dir_mode,
@ -419,7 +614,14 @@ process {
    }

    withName: MOTUS_PROFILE {
-        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
+        ext.args = {
+            [
+                params.motus_remove_ncbi_ids ? "" : "-p",
+                params.motus_use_relative_abundance ? "" : "-c",
+                params.motus_save_mgc_read_counts ?  "-M ${task.ext.prefix}.mgc" : ""
+            ].join(',').replaceAll(','," ")
+            }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/motus/${meta.db_name}/" },
            mode: params.publish_dir_mode
@ -427,7 +629,7 @@ process {
    }

    withName: MOTUS_MERGE {
-        ext.args = { params.generate_biom_output ? "-B" : "" }
+        ext.args = { params.standardisation_motus_generatebiom ? "-B" : "" }
        ext.prefix = { "motus_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/motus/" },
@ -435,6 +637,23 @@ process {
        ]
    }

+    withName: TAXPASTA_MERGE {
+        ext.args =  {
+            [
+                "-p ${meta.tool} -o ${meta.tool}_${meta.id}.${params.standardisation_taxpasta_format}",
+                params.taxpasta_add_name ?  "--add-name" : "",
+                params.taxpasta_add_rank ? "--add-rank" : "",
+                params.taxpasta_add_lineage ? "--add-lineage" : "",
+                params.taxpasta_add_idlineage ? "--add-id-lineage" : ""
+            ].join(' ').trim()
+            }
+        publishDir = [
+            path: { "${params.outdir}/taxpasta/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{tsv,csv,arrow,parquet,biom}'
+        ]
+    }
+
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
        publishDir = [
            path: { "${params.outdir}/pipeline_info" },
--- a/conf/test.config
+++ b/conf/test.config
@ -20,8 +20,6 @@ params {
    max_time   = '6.h'

    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = true
@ -34,22 +32,25 @@ params {
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = true
    run_kraken2                           = true
-    run_malt                              = true
+    run_bracken                           = true
+    run_malt                              = false
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
+    run_krakenuniq                        = true
    run_motus                             = false
    run_krona                             = true
    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
    malt_save_reads                       = true
    kraken2_save_reads                    = true
    centrifuge_save_reads                 = true
-    diamond_save_reads                    = true
+    run_profile_standardisation           = true
 }

 process {
    withName: MALT_RUN {
        maxForks = 1
+        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
    withName: MEGAN_RMA2INFO_TSV {
        maxForks = 1
--- a/conf/test_full.config
+++ b/conf/test_full.config
@ -1,24 +1,71 @@
 /*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running full-size tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a full size pipeline test.
-
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test_full,<docker/singularity> --outdir <OUTDIR>
-
 ----------------------------------------------------------------------------------------
 */

+cleanup = true
+
 params {
    config_profile_name        = 'Full test profile'
    config_profile_description = 'Full test dataset to check pipeline function'

    // Input data for full size test
-    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    input     = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/samplesheet_full.csv'
+    databases = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/database_full.csv'

    // Genome references
-    genome = 'R64-1-1'
+    hostremoval_reference = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/819/615/GCA_000819615.1_ViralProj14015/GCA_000819615.1_ViralProj14015_genomic.fna.gz'
+
+    save_preprocessed_reads             = false
+
+    perform_shortread_qc                = true
+    shortread_qc_mergepairs             = true
+    perform_shortread_complexityfilter  = false
+    save_complexityfiltered_reads       = false
+
+    perform_longread_qc                 = true
+    perform_shortread_hostremoval       = true
+    perform_longread_hostremoval        = true
+    save_hostremoval_index              = false
+    save_hostremoval_bam                = false
+    save_hostremoval_unmapped           = false
+
+    perform_runmerging                  = true
+    save_runmerged_reads                = false
+
+    run_centrifuge                      = true
+    centrifuge_save_reads               = false
+
+    run_diamond                         = true
+
+    run_kaiju                           = true
+
+    run_kraken2                         = true
+    kraken2_save_reads                  = false
+    kraken2_save_readclassification     = false
+    kraken2_save_minimizers             = false
+
+    run_krakenuniq                      = true
+    krakenuniq_save_reads               = false
+    krakenuniq_save_readclassifications = false
+
+    run_bracken                         = true
+    run_malt                            = true
+    malt_save_reads                     = false
+    malt_generate_megansummary          = true
+
+    run_metaphlan3                      = true
+
+    run_motus                           = true
+    motus_save_mgc_read_counts          = true
+
+    run_profile_standardisation         = true
+    run_krona                           = true
 }
+
+cleanup = true
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@ -0,0 +1,65 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+//
+// Separate test as KrakenUniq database can sometimes be too big for GHA
+//
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test to check KrakenUniq function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
+    perform_shortread_qc                  = true
+    perform_longread_qc                   = true
+    shortread_qc_mergepairs               = true
+    perform_shortread_complexityfilter    = true
+    perform_shortread_hostremoval         = true
+    perform_longread_hostremoval          = true
+    perform_runmerging                    = true
+    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
+    run_kaiju                             = false
+    run_kraken2                           = false
+    run_bracken                           = false
+    run_malt                              = false
+    run_metaphlan3                        = false
+    run_centrifuge                        = false
+    run_diamond                           = false
+    run_krakenuniq                        = true
+    run_motus                             = false
+    run_krona                             = true
+    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
+    malt_save_reads                       = false
+    kraken2_save_reads                    = false
+    centrifuge_save_reads                 = false
+    diamond_save_reads                    = false
+    run_profile_standardisation           = true
+}
+
+process {
+    withName: MALT_RUN {
+        maxForks = 1
+    }
+    withName: MEGAN_RMA2INFO_TSV {
+        maxForks = 1
+    }
+    withName: MEGAN_RMA2INFO_KRONA {
+        maxForks = 1
+    }
+}
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@ -10,6 +10,10 @@
 ----------------------------------------------------------------------------------------
 */

+//
+// Separate test as mOTUs database download can be flaky
+//
+
 params {
    config_profile_name        = 'mOTUs Test profile'
    config_profile_description = 'Minimal test to check mOTUs function'
@ -20,8 +24,6 @@ params {
    max_time   = '6.h'

    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'database_motus.csv'
    perform_shortread_qc                  = false
@ -33,10 +35,15 @@ params {
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
+    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
+    run_krakenuniq                        = false
    run_motus                             = true
+    motus_save_mgc_read_counts            = false
+    motus_remove_ncbi_ids                 = false
+    motus_use_relative_abundance          = false
    run_profile_standardisation           = true
 }
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@ -20,8 +20,6 @@ params {
    max_time   = '6.h'

    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = false
@ -33,10 +31,12 @@ params {
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = true
    run_kraken2                           = true
+    run_bracken                           = true
    run_malt                              = true
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
+    run_krakenuniq                        = true
    run_motus                             = false
    run_krona                             = true
 }
@ -44,5 +44,6 @@ params {
 process {
    withName: MALT_RUN {
        maxForks = 1
+        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
 }
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@ -20,8 +20,6 @@ params {
    max_time   = '6.h'

    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = true
@ -34,10 +32,12 @@ params {
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
+    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
+    run_krakenuniq                        = false
    run_motus                             = false
 }

--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@ -20,8 +20,6 @@ params {
    max_time   = '6.h'

    // Input data
-    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = false
@ -33,15 +31,18 @@ params {
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
+    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
+    run_krakenuniq                        = false
    run_motus                             = false
 }

 process {
    withName: MALT_RUN {
        maxForks = 1
+        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
 }
--- a/docs/images/taxprofiler_tube.pdf
+++ b/docs/images/taxprofiler_tube.pdf
--- a/docs/images/taxprofiler_tube.png
+++ b/docs/images/taxprofiler_tube.png
--- a/docs/images/taxprofiler_tube.svg
+++ b/docs/images/taxprofiler_tube.svg
--- a/docs/output.md
+++ b/docs/output.md
@ -6,36 +6,509 @@ This document describes the output produced by the pipeline. Most of the plots a

 The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.

-<!-- TODO nf-core: Write this documentation describing your workflow's output -->
-
 ## Pipeline overview

 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:

 - [FastQC](#fastqc) - Raw read QC
+- [falco](#fastqc) - Alternative to FastQC for raw read QC
+- [fastp](#fastp) - Adapter trimming for Illumina data
+- [AdapterRemoval](#adapterremoval) - Adapter trimming for Illumina data
+- [Porechop](#porechop) - Adapter removal for Oxford Nanopore data
+- [BBDuk](#bbduk) - Quality trimming and filtering for Illumina data
+- [PRINSEQ++](#prinseq) - Quality trimming and filtering for Illunina data
+- [Filtlong](#filtlong) - Quality trimming and filtering for Nanopore data
+- [Bowtie2](#bowtie2) - Host removal for Illumina reads
+- [minimap2](#minimap2) - Host removal for Nanopore reads
+- [SAMtools stats](#samtools-stats) - Statistics from host removal
+- [SAMtools fastq](#samtools-fastq) - Converts unmapped BAM file to fastq format (minimap2 only)
+- [Analysis Ready Reads](#analysis-read-reads) - Optional results directory containing the final processed reads used as input for classification/profiling.
+- [Bracken](#bracken) - Taxonomic classifier using k-mers and abundance estimations
+- [Kraken2](#kraken2) - Taxonomic classifier using exact k-mer matches
+- [KrakenUniq](#krakenuniq) - Taxonomic classifier that combines the k-mer-based classification and the number of unique k-mers found in each species
+- [Centrifuge](#centrifuge) - Taxonomic classifier that uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index.
+- [Kaiju](#kaiju) - Taxonomic classifier that finds maximum (in-)exact matches on the protein-level.
+- [Diamond](#diamond) - Sequence aligner for protein and translated DNA searches.
+- [MALT](#malt) - Sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics
+- [MetaPhlAn3](#metaphlan3) - Genome-level marker gene based taxonomic classifier
+- [mOTUs](#motus) - Tool for marker gene-based OTU (mOTU) profiling.
+- [TAXPASTA](#taxpasta) - Tool to standardise taxonomic profiles as well as merge profiles across samples from the same database and classifier/profiler.
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution

-### FastQC
+![](images/taxprofiler_tube.png)
+
+### FastQC or Falco

 <details markdown="1">
 <summary>Output files</summary>

- `fastqc/`
-  - `*_fastqc.html`: FastQC report containing quality metrics.
-  - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
+- `{fastqc,falco}/`
+  - {raw,preprocessed}
+    - `*html`: FastQC or Falco report containing quality metrics in HTML format.
+    - `*.txt`: FastQC or Falco report containing quality metrics in TXT format.
+    - `*.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images (FastQC only).

 </details>

 [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).

+If preprocessing is turned on, nf-core/taxprofiler runs FastQC/Falco twice -once before and once after adapter removal/read merging, to allow evaluation of the performance of these preprocessing steps. Note in the General Stats table, the columns of these two instances of FastQC/Falco are placed next to each other to make it easier to evaluate. However, the columns of the actual preprocessing steps (i.e, fastp, AdapterRemoval, and Porechop) will be displayed _after_ the two FastQC/Falco columns, even if they were run 'between' the two FastQC/Falco jobs in the pipeline itself.
+
+> ℹ️ Falco produces identical output to FastQC but in the `falco/` directory.
+
 ![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png)

 ![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png)

 ![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)

-> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality.
+### fastp
+
+[fastp](https://github.com/OpenGene/fastp) is a FASTQ pre-processing tool for quality control, trimmming of adapters, quality filtering and other features.
+
+It is used in nf-core/taxprofiler for adapter trimming of short-reads.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `fastp`
+  - `<sample_id>.fastp.fastq.gz`: File with the trimmed unmerged fastq reads.
+  - `<sample_id>.merged.fastq.gz`: File with the reads that were successfully merged.
+  - `<sample_id>.*{log,html,json}`: Log files in different formats.
+
+</details>
+
+By default nf-core/taxprofiler will only provide the `<sample_id>.fastp.fastq.gz` file if fastp is selected. The file `<sample_id>.merged.fastq.gz` will be available in the output folder if you provide the argument ` --shortread_qc_mergepairs` (optionally retaining un-merged pairs when in combination with `--shortread_qc_includeunmerged`).
+
+You can change the default value for low complexity filtering by using the argument `--shortread_complexityfilter_fastp_threshold`.
+
+### AdapterRemoval
+
+[AdapterRemoval](https://adapterremoval.readthedocs.io/en/stable/) searches for and removes remnant adapter sequences from High-Throughput Sequencing (HTS) data and (optionally) trims low quality bases from the 3' end of reads following adapter removal. It is popular in the field of palaeogenomics. The output logs are stored in the results folder, and as a part of the MultiQC report.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `adapterremoval/`
+  - `<sample_id>.settings`: AdapterRemoval log file containing general adapter removal, read trimming and merging statistics
+  - `<sample_id>.collapsed.fastq.gz` - read-pairs that merged and did not undergo trimming (only when `--shortread_qc_mergepairs` supplied)
+  - `<sample_id>.collapsed.truncated.fastq.gz` - read-pairs that merged underwent quality trimming (only when `--shortread_qc_mergepairs` supplied)
+  - `<sample_id>.pair1.truncated.fastq.gz` - read 1 of pairs that underwent quality trimming
+  - `<sample_id>.pair2.truncated.fastq.gz` - read 2 of pairs that underwent quality trimming (and could not merge if `--shortread_qc_mergepairs` supplied)
+  - `<sample_id>.singleton.truncated.fastq.gz` - orphaned read pairs where one of the pair was discarded
+  - `<sample_id>.discard.fastq.gz` - reads that were discarded due to length or quality filtering
+
+</details>
+
+By default nf-core/taxprofiler will only provide the `.settings` file if AdapterRemoval is selected.
+
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as complexity filtering, host removal, run merging etc..
+
+### Porechop
+
+[Porechop](https://github.com/rrwick/Porechop) is a tool for finding and removing adapters from Oxford Nanopore reads. Adapters on the ends of reads are trimmed and if a read has an adapter in its middle, it is considered a chimeric and it chopped into separate reads.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `porechop`
+  - `<sample_id>.log`: Log file containing trimming statistics
+  - `<sample_id>.fastq.gz`: Adapter-trimmed file
+
+</details>
+
+The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually.
+
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ⚠️ We do **not** recommend using Porechop if you are already trimming the adapters with ONT's basecaller Guppy.
+
+### BBDuk
+
+[BBDuk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/) stands for Decontamination Using Kmers. BBDuk was developed to combine most common data-quality-related trimming, filtering, and masking operations into a single high-performance tool.
+
+It is used in nf-core/taxprofiler for complexity filtering using different algorithms. This means that it will remove reads with low sequence diversity (e.g. mono- or dinucleotide repeats).
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `bbduk/`
+  - `<sample_id>.bbduk.log`: log file containing filtering statistics
+  - `<sample_id>.fastq.gz`: resulting FASTQ file without low-complexity reads
+
+</details>
+
+By default nf-core/taxprofiler will only provide the `.log` file if BBDuk is selected as the complexity filtering tool. You will only find the complexity filtered reads in your results directory if you provide ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc..
+
+### PRINSEQ++
+
+[PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus) is a C++ implementation of the [prinseq-lite.pl](https://prinseq.sourceforge.net/) program. It can be used to filter, reformat or trim genomic and metagenomic sequence data.
+
+It is used in nf-core/taxprofiler for complexity filtering using different algorithms. This means that it will remove reads with low sequence diversity (e.g. mono- or dinucleotide repeats).
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `prinseqplusplus/`
+  - `<sample_id>.log`: log file containing number of reads. Row IDs correspond to: `min_len, max_len, min_gc, max_gc, min_qual_score, min_qual_mean, ns_max_n, noiupac, derep, lc_entropy, lc_dust, trim_tail_left, trim_tail_right, trim_qual_left, trim_qual_right, trim_left, trim_right`
+  - `<sample_id>_good_out.fastq.gz`: resulting FASTQ file without low-complexity reads
+
+</details>
+
+By default nf-core/taxprofiler will only provide the `.log` file if PRINSEQ++ is selected as the complexity filtering tool. You will only find the complexity filtered `.fastq` files in your results directory if you supply ` --save_complexityfiltered_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as host removal, run merging etc..
+
+### Filtlong
+
+[Filtlong](https://github.com/rrwick/Filtlong) is a quality filtering tool for long reads. It can take a set of small reads and produce a smaller, better subset.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `filtlong`
+  - `<sample_id>_filtered.fastq.gz`: Quality or short read data filtered file
+  - `<sample_id>_filtered.log`: log file containing summary statistics
+
+</details>
+
+You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ⚠️ We do **not** recommend using Filtlong if you are performing filtering of low quality reads with ONT's basecaller Guppy.
+
+### Bowtie2
+
+[Bowtie 2](https://bowtie-bio.sourceforge.net/bowtie2/index.shtml) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively long (e.g. mammalian) genomes.
+
+It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) and/or other possible contaminant reads (e.g. Phi X) from short-read `.fastq` files prior to profiling.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `bowtie2/`
+  - `build/`
+    - `*.bt2`: Bowtie2 indicies of reference genome, only if `--save_hostremoval_index` supplied.
+  - `align/`
+    - `<sample_id>.bam`: BAM file containing reads that aligned against the user-supplied reference genome as well as unmapped reads
+    - `<sample_id>.bowtie2.log`: log file about the mapped reads
+    - `<sample_id>.unmapped.fastq.gz`: the off-target reads from the mapping that is used in downstream steps.
+
+</details>
+
+By default nf-core/taxprofiler will only provide the `.log` file if host removal is turned on. You will only have a `.bam` file if you specify `--save_hostremoval_bam`. This will contain _both_ mapped and unmapped reads. You will only get FASTQ files if you specify to save `--save_hostremoval_unmapped` - these contain only unmapped reads. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ℹ️ Unmapped reads in FASTQ are only found in this directory for short-reads, for long-reads see [`samtools/fastq/`](#samtools-fastq)
+
+> ⚠️ The resulting `.fastq` files may _not_ always be the 'final' reads that go into taxprofiling, if you also run other steps such as run merging etc..
+
+> ℹ️ While there is a dedicated section in the MultiQC HTML for Bowtie2, these values are not displayed by default in the General Stats table. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report for direct comparison with minimap2 (see below).
+
+### minimap2
+
+[minimap2](https://github.com/lh3/minimap2) is an alignment tool suited to mapping long reads to reference sequences.
+
+It is used with nf-core/taxprofiler to allow removal of 'host' (e.g. human) or other possible contaminant reads from long-read `.fastq` files prior to taxonomic classification/profiling.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `minimap2`
+  - `build/`
+    - `*.mmi2`: minimap2 indices of reference genome, only if `--save_hostremoval_index` supplied.
+  - `align/`
+    - `<sample_id>.bam`: Alignment file in BAM format containing both mapped and unmapped reads.
+
+</details>
+
+By default, nf-core/taxprofiler will only provide the `.bam` file containing mapped and unmapped reads if saving of host removal for long reads is turned on via `--save_hostremoval_bam`.
+
+> ℹ️ minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report.
+
+> ℹ️ Unlike Bowtie2, minimap2 does not produce an unmapped FASTQ file by itself. See [`samtools/fastq`](#samtools-fastq)
+
+### SAMtools fastq
+
+[SAMtools fastq](http://www.htslib.org/doc/1.1/samtools.html) converts a `.sam`, `.bam`, or `.cram` alignment file to FASTQ format
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `samtoolsstats`
+  - `<sample_id>_interleaved.fq.gz`: Unmapped reads only in FASTQ gzip format
+
+</details>
+
+This directory will be present and contain the unmapped reads from the `.fastq` format from long-read minimap2 host removal, if `--save_hostremoval_unmapped` is supplied. Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+> ℹ️ For short-read unmapped reads, see [bowtie2](#bowtie2).
+
+### Analysis Ready Reads
+
+> ℹ️ This optional results directory will only be present in the pipeline results when supplying `--save_analysis_ready_reads`.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `samtoolsstats`
+  - `<sample_id>_{fq,fastq}.gz`: Final reads that underwent preprocessing and were sent for classification/profiling.
+
+</details>
+
+The results directory will contain the 'final' processed reads used as input for classification/profiling. It will _only_ include the output of the _last_ step of any combinations of preprocessing steps that may have been specified in the run configuration. For example, if you perform the read QC and host-removal preprocessing steps, the final reads that are sent to classification/profiling are the host-removed FASTQ files - those will be the ones present in this directory.
+
+> ⚠️ If you turn off all preprocessing steps, then no results will be present in this directory. This happens independently for short- and long-reads. I.e. you will only have FASTQ files for short reads in this directory if you skip all long-read preprocessing.
+
+### SAMtools stats
+
+[SAMtools stats](http://www.htslib.org/doc/samtools-stats.html) collects statistics from a `.sam`, `.bam`, or `.cram` alignment file and outputs in a text format.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `samtools/stats`
+  - `<sample_id>.stats`: File containing samtools stats output.
+
+</details>
+
+In most cases you do not need to check this file, as it is rendered in the MultiQC run report.
+
+### Run Merging
+
+nf-core/taxprofiler offers the option to merge FASTQ files of multiple sequencing runs or libraries that derive from the same sample, as specified in the input samplesheet.
+
+This is the last possible preprocessing step, so if you have multiple runs or libraries (and run merging turned on), this will represent the final reads that will go into classification/profiling steps.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `run_merging/`
+  - `*.fastq.gz`: Concatenated FASTQ files on a per-sample basis
+
+</details>
+
+Note that you will only find samples that went through the run merging step in this directory. For samples that had a single run or library will not go through this step of the pipeline and thus will not be present in this directory.
+
+This directory and its FASTQ files will only be present if you supply `--save_runmerged_reads`.Alternatively, if you wish only to have the 'final' reads that go into classification/profiling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_reads`, in which case the reads will be in the folder `analysis_ready_reads`.
+
+### Bracken
+
+[Bracken](https://ccb.jhu.edu/software/bracken/) (Bayesian Reestimation of Abundance with Kraken) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample. Braken uses the taxonomy labels assigned by Kraken, a highly accurate metagenomics classification algorithm, to estimate the number of reads originating from each species present in a sample.
+
+> 🛈 The first step of using Bracken requires running Kraken2, therefore the initial results before abundance estimation will be found in `<your_results>/kraken2/<your_bracken_db_name>`.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `bracken/`
+  - `bracken_<db_name>_combined_reports.txt`: combined bracken results as output from Bracken's `combine_bracken_outputs.py` script
+  - `<db_name>/`
+    - `<sample>_<db_name>.tsv`: TSV file containing per-sample summary of Bracken results with abundance information
+
+</details>
+
+The main taxonomic profiling file from Bracken is the `*.tsv` file. This provides the basic results from Kraken2 but with the corrected abundance information.
+
+### Kraken2
+
+[Kraken](https://ccb.jhu.edu/software/kraken2/) is a taxonomic sequence classifier that assigns taxonomic labels to DNA sequences. Kraken examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps -mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `kraken2/`
+  - `<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `krakentools`)
+  - `<db_name>/`
+    - `<sample_id>_<db_name>.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample
+    - `<sample_id>_<db_name>.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample
+    - `<sample_id>_<db_name>.report.txt`: A Kraken2 report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits in the Kraken2 run for a given sample
+    - `<sample_id>_<db_name>.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample
+
+</details>
+
+The main taxonomic classification file from Kraken2 is the `_combined_reports.txt` or `*report.txt` file. The former provides you the broadest over view of the taxonomic classification results across all samples against a single databse, where you get two columns for each sample e.g. `2_all` and `2_lvl`, as well as a summarised column summing up across all samples `tot_all` and `tot_lvl`. The latter gives you the most information for a single sample. The report file is also used for the taxpasta step.
+
+You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--kraken2_save_reads` and/or `--kraken2_save_readclassification` parameters to the pipeline.
+
+### KrakenUniq
+
+[KrakenUniq](https://github.com/fbreitwieser/krakenuniq) (formerly KrakenHLL) is an extenson to the fast k-mer-based classification [Kraken](https://github.com/DerrickWood/kraken) with an efficient algorithm for additionally assessing the coverage of unique k-mers found in each species in a dataset.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `krakenuniq/`
+  - `<db_name>/`
+    - `<sample_id>_<db_name>.classified.fastq.gz`: FASTQ file containing all reads that had a hit against a reference in the database for a given sample
+    - `<sample_id>_<db_name>.unclassified.fastq.gz`: FASTQ file containing all reads that did not have a hit in the database for a given sample
+    - `<sample_id>_<db_name>.report.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of Kmers, taxonomic path of all the hits, with an additional column for k-mer coverage, that allows for more accurate distinguishing between false-positive/true-postitive hits
+    - `<sample_id>_<db_name>.classifiedreads.txt`: A list of read IDs and the hits each read had against each database for a given sample
+
+</details>
+
+The main taxonomic classification file from KrakenUniq is the `*report.txt` file. This is an extension of the Kraken2 report with the additional k-mer coverage information that provides more information about the accuracy of hits.
+
+You will only receive the `.fastq` and `*classifiedreads.txt` file if you supply `--krakenuniq_save_reads` and/or `--krakenuniq_save_readclassification` parameters to the pipeline.
+
+> ⚠️ The output system of KrakenUniq can result in other `stdout` or `stderr` logging information being saved in the report file, therefore you must check your report files before downstream use!
+
+### Centrifuge
+
+[Centrifuge](https://github.com/DaehwanKimLab/centrifuge) is a taxonomic sequence classifier that uses a Burrows-Wheeler transform and Ferragina-Manzina index for storing and mapping sequences.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `centrifuge`
+  - `<sample_id>.centrifuge.mapped.fastq.gz`: `FASTQ` files containing all mapped reads
+  - `<sample_id>.centrifuge.report.txt`: A classification report that summarises the taxonomic ID, the taxonomic rank, length of genome sequence, number of classified and uniquely classified reads
+  - `<sample_id>.centrifuge.results.txt`: A file that summarises the classification assignment for a read, i.e read ID, sequence ID, score for the classification, score for the next best classification, number of classifications for this read
+  - `<sample_id>.centrifuge.txt`: A Kraken2-style report that summarises the fraction abundance, taxonomic ID, number of k-mers, taxonomic path of all the hits in the centrifuge run for a given sample
+  - `<sample_id>.centrifuge.unmapped.fastq.gz`: FASTQ file containing all unmapped reads
+
+</details>
+
+The main taxonomic classification files from Centrifuge are the `_combined_reports.txt`, `*report.txt`, `*results.txt` and the `*centrifuge.txt`. The latter is used by the taxpasta step. You will receive the `.fastq` files if you supply `--centrifuge_save_reads`.
+
+### Kaiju
+
+[Kaiju](https://github.com/bioinformatics-centre/kaiju) is a taxonomic classifier that finds maximum exact matches on the protein-level using the Burrows-Wheeler transform.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `kaiju`
+  - `kaiju_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by kaiju2table)
+  - `<db_name>/`
+    - `<sample_id>_<db_name>.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID
+    - `<sample_id>_<db_name>.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`)
+
+</details>
+
+The most useful summary file is the `_combined_reports.txt` file which summarises hits across all reads and samples. Separate per-sample versions summaries can be seen in `<db>/*.txt`. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.
+
+### DIAMOND
+
+[DIAMOND](https://github.com/bbuchfink/diamond) is a sequence aligner for translated DNA searches or protein sequences against a protein reference database such as NR. It is a replacement for the NCBI BLAST software tools.It has many key features and it is used as taxonomic classifier in nf-core/taxprofiler.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `diamond`
+  - `<sample_id>.log`: A log file containing stdout information
+  - `<sample_id>*.{blast,xml,txt,daa,sam,tsv,paf}`: A file containing alignment information in various formats, or taxonomic information in a text-based format. Exact output depends on user choice.
+
+</details>
+
+By default you will receive a TSV output. Alternatively, you will receive a `*.sam` file if you provide the parameter `--diamond_save_reads` but in this case no taxonomic classification will be available(!), only the aligned reads in sam format.
+
+> ℹ️ DIAMOND has many output formats, so depending on your [choice](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options) with ` --diamond_output_format` you will receive the taxonomic information in a different format.
+
+### MALT
+
+[MALT](https://software-ab.cs.uni-tuebingen.de/download/malt) is a fast replacement for BLASTX, BLASTP and BLASTN, and provides both local and semi-global alignment capabilities.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `malt/`
+  - `<db_name>/`
+  - `<sample_id>.blastn.sam`: sparse SAM file containing alignments of each hit
+  - `<sample_id>.megan`: summary file that can be loaded into the [MEGAN6](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/) interactive viewer. Generated by MEGAN6 companion tool `rma2info`
+  - `<sample_id>.rma6`: binary file containing all alignments and taxonomic information of hits that can be loaded into the [MEGAN6](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/) interactive viewer
+  - `<sample_id>.txt.gz`: text file containing taxonomic IDs and read counts against each taxon. Generated by MEGAN6 companion tool `rma2info`
+
+</details>
+
+The main output of MALT is the `.rma6` file format, which can be only loaded into MEGAN and it's related tools. We provide the `rma2info` text files for improved compatibility with spreadsheet programs and other programmtic data manipulation tools, however this has only limited information compared to the 'binary' RMA6 file format (the `.txt` file only contains taxonomic ID and count, whereas RMA6 has taxonomic lineage information).
+
+You will only receive the `.sam` and `.megan` files if you supply `--malt_save_reads` and/or `--malt_generate_megansummary` parameters to the pipeline.
+
+### MetaPhlAn3
+
+[MetaPhlAn3](https://github.com/biobakery/metaphlan) is a computational tool for profiling the composition of microbial communities (Bacteria, Archaea and Eukaryotes) from metagenomic shotgun sequencing data (i.e. not 16S) with species-level resolution via marker genes.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `metaphlan3/`
+  - `metaphlan3_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `metaphlan_merge_tables`)
+  - `<db_name>/`
+    - `<sample_id>.biom`: taxonomic profile in BIOM format
+    - `<sample_id>.bowtie2out.txt`: BowTie2 alignment information (can be re-used for skipping alignment when re-running MetaPhlAn3 with different parameters)
+    - `<sample_id>_profile.txt`: MetaPhlAn3 taxonomic profile including abundance estimates
+
+</details>
+
+The main taxonomic profiling file from MetaPhlAn3 is the `*_profile.txt` file. This provides the abundance estimates from MetaPhlAn3 however does not include raw counts by default.
+
+### mOTUs
+
+[mOTUS](https://github.com/motu-tool/mOTUs) is a taxonomic profiler that maps reads to a unique marker specific database and estimates the relative abundance of known and unknown species.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `motus`
+  - `<sample_id>.log`: A log file that contains summary statistics
+  - `<sample_id>.out`: A classification file that summarises taxonomic identifiers, by default at the rank of mOTUs (i.e., species level), and their relative abundances in the profiled sample.
+  - `motus_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `motus_merge`)
+
+</details>
+
+Normally `*_combined_reports.txt` is the most useful file for downstream analyses, but the per sample `.out` file can provide additional more specific information. By default, nf-core/taxprofiler is providing a column describing NCBI taxonomic ID as this is used in the taxpasta step. You can disable this column by activating the argument `--motus_remove_ncbi_ids`.
+You will receive the relative abundance instead of read counts if you provide the argument `--motus_use_relative_abundance`.
+
+### Krona
+
+[Krona](https://github.com/marbl/Krona) allows the exploration of (metagenomic) hierarchical data with interactive zooming, multi-layered pie charts.
+
+Krona charts will be generated by the pipeline for supported tools (Kraken2, Centrifuge, Kaiju, and MALT)
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `krona/`
+  - `<tool_name>_<db_name>.html`: per-tool/per-database interactive HTML file containing hierarchical piecharts
+
+</details>
+
+The resulting HTML files can be loaded into your web browser for exploration. Each file will have a dropdown to allow you to switch between each sample aligned against the given database of the tool.
+
+### TAXPASTA
+
+[TAXPASTA](https://github.com/taxprofiler/taxpasta) standardises and merges two or more taxonomic profiles across samples into one single table. It supports multiple different classifiers simplifying comparison of taxonomic classification results between tools and databases.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `taxpasta`
+
+  - `<tool>_<database>*.{tsv,csv,arrow,parquet,biom}`: Standardised taxon table containing multiple samples. The standard format is the `tsv`. The first column describes the taxonomy ID and the rest of the columns describe the read counts for each sample.
+
+  </details>
+
+By providing the path to a directory containing taxdump files to `--taxpasta_taxonomy_dir`, the taxon name, the taxon rank, the taxon's entire lineage including taxon names and/or the taxon's entire lineage including taxon identifiers can also be added in the output in addition to just the taxon ID. Addition of this extra information can be turned by using the parameters `--taxpasta_add_name`, `--taxpasta_add_rank`, `--taxpasta_add_lineage` and `--taxpasta_add_idlineage` respectively.
+
+These files will likely be the most useful files for the comparison of differences in classification between different tools or building consensuses, with the caveat they have slightly less information than the actual output from each tool (which may have non-standard information e.g. taxonomic rank, percentage of hits, abundance estimations).
+
+The following report files are used for the taxpasta step:
+
+- Bracken: `<sample>_<db_name>.tsv` Taxpasta used the `new_est_reads` column for the standardised profile.
+- Centrifuge: `<sample_id>.centrifuge.txt` Taxpasta uses the `direct_assigned_reads` column for the standardised profile.
+- Diamond: `<sample_id>` Taxpasta summarises number of reads per NCBI taxonomy ID standardised profile.
+- Kaiju: `<sample_id>_<db_name>.kaijutable.txt` Taxpasta uses the `reads` column from kaiju2table standardised profile.
+- KrakenUniq: `<sample_id>_<db_name>.report.txt` Taxpasta uses the `reads` column for the standardised profile.
+- Kraken2: `<sample_id>_<db_name>.report.txt` Taxpasta uses the `direct_assigned_reads` column for the standardised profile.
+- MALT: `<sample_id>.txt.gz` Taxpasta uses the `count` (second) column from the output of MEGAN6's rma2info for the standardised profile.
+- MetaPhlAn3: `<sample_id>_profile.txt` Taxpasta uses the `relative_abundance` column multiplied with a fixed number to yield an integer for the standardised profile.
+- mOTUs: `<sample_id>.out` Taxpasta uses the `read_count` column for the standardised profile.
+
+> ⚠️ Please aware the outputs of each tool's standardised profile _may not_ be directly comparable between each tool. Some may report raw read counts, whereas others may report abundance information. Please always refer to the list above, for which information is used for each tool.

 ### MultiQC

@ -53,6 +526,31 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d

 Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see <http://multiqc.info>.

+All tools in taxprofiler supported by MultiQC will have a dedicated section showing summary statistics of each tool based on information stored in log files.
+
+You can expect in the MultiQC reports either sections and/or general stats columns for the following tools:
+
+- fastqc
+- adapterRemoval
+- fastp
+- bbduk
+- prinseqplusplus
+- porechop
+- filtlong
+- bowtie2
+- minimap2
+- samtools (stats)
+- kraken
+- bracken
+- centrifuge
+- kaiju
+- metaphlan
+- diamond
+- malt
+- motus
+
+> ℹ️ The 'General Stats' table by default will only show statistics referring to pre-processing steps, and will not display possible values from each classifier/profiler, unless turned on by the user within the 'Configure Columns' menu or via a custom MultiQC config file (`--multiqc_config`)
+
 ### Pipeline information

 <details markdown="1">
--- a/docs/usage.md
+++ b/docs/usage.md
@ -6,14 +6,25 @@

 ## Introduction

-<!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. -->
+nf-core/taxprofiler is a pipeline for highly-parallelised taxonomic classification and profiling of shotgun metagenomic data across multiple tools simultaneously. In addition to multiple classification and profiling tools, at the same time it allows you to performing taxonomic classification and profiling across multiple databases and settings per tool, as well as produces standardised output tables to allow immediate cross comparison of results between tools.
+
+To run nf-core/taxprofiler, at a minimum two you require two inputs:
+
+- a sequencing read samplesheet
+- a database samplesheet
+
+Both contain metadata and paths to the data of your input samples and databases.
+
+When running nf-core/taxprofiler, every step and tool is 'opt in'. To run a given classifier or profiler you must make sure to supply both a database in your `<database>.csv` and supply `--run_<profiler>` flag to your command. Omitting either will result in the profiling tool not executing.
+
+nf-core/taxprofiler also includes optional pre-processing (adapter clipping, merge running etc.) or post-processing (visualisation) steps. These are also opt in with a `--perform_<step>` flag. In some cases, the pre- and post-processing steps may also require additional files. Please check the parameters tab of this documentation for more information.
+
+Please see the rest of this page for information about how to prepare input samplesheets and databases and how to run Nextflow pipelines. See the [parameters](https://nf-co.re/taxprofiler/parameters) documentation for more information about specific options the pipeline also offers.

 ## Samplesheet inputs

 nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).

-> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
-
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.

 This samplesheet is then specified on the command line as follows:
@ -43,6 +54,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th
 A final samplesheet file consisting of both single- and paired-end data, as well as long-read FASTA files may look something like the one below. This is for 6 samples, where `2612` has been sequenced twice.

 ```console
+sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
 2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
 2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
 2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
@ -50,6 +62,10 @@ A final samplesheet file consisting of both single- and paired-end data, as well
 ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,
 ```

+> ⚠️ Input FASTQ and FASTA files _must_ be gzipped
+
+> ⚠️ While one can include both short-read and long-read data in one run, we recommend that you split these across _two_ pipeline runs and database sheets (see below). This will allow classification optimisation for each data type, and make MultiQC run-reports more readable (due to run statistics having vary large number differences).
+
 | Column                | Description                                                                                                                                                                                              |
 | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `sample`              | Unique sample name [required].                                                                                                                                                                           |
@ -63,84 +79,60 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p

 ### Full database sheet

-nf-core/taxprofiler supports multiple databases being profiled in parallel for each tool.
+nf-core/taxprofiler supports multiple databases being classified/profiled against in parallel for each tool.
+
 Databases can be supplied either in the form of a compressed `.tar.gz` archive of a directory containing all relevant database files or the path to a directory on the filesystem.
-The pipeline takes the locations and specific profiling parameters of the tool of these databases as input via a four column comma-separated sheet.

 > ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.

-An example database sheet can look as follows, where 4 tools are being used, and `malt` and `kraken2` will be used against two databases each.
+The pipeline takes the paths and specific classification/profiling parameters of the tool of these databases as input via a four column comma-separated sheet.
+
+> ⚠️ To allow user freedom, nf-core/taxprofiler does not check for mandatory or the validity of non-file database parameters for correct execution of the tool - excluding options offered via pipeline level parameters! Please validate your database parameters (cross-referencing [parameters](https://nf-co.re/taxprofiler/parameters, and the given tool documentation) before submitting the database sheet! For example, if you don't use the default read length - Bracken will require `-r <read_length>` in the `db_params` column.
+
+An example database sheet can look as follows, where 7 tools are being used, and `malt` and `kraken2` will be used against two databases each.
+
+`kraken2` will be run twice even though only having a single 'dedicated' database because specifying `bracken` implies first running `kraken2` on the `bracken` database, as required by `bracken`.

 ```console
 tool,db_name,db_params,db_path
 malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
 malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
-kraken2,db1,,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
+bracken,db1,;-r 150,/<path>/<to>/bracken/testdb-bracken.tar.gz
 kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
+krakenuniq,db3,,/<path>/<to>/krakenuniq/testdb-krakenuniq.tar.gz
 centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
 metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
 motus,db_mOTU,,/<path>/<to>/motus/motus_database/
 ```

+For Bracken, if you wish to supply any parameters to either the Kraken or Bracken step you **must** have a _semi-colon_ `;` list as in `db_params`. This is to allow to specify the Kraken2 parameters before, and Bracken parameters after the `;` as Bracken is a two step process. This is particularly important if you supply a Bracken database with a non-default read length parameter. If you do not have any parameters to specify, you can leave this as empty.
+
 Column specifications are as follows:

-| Column      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `tool`      | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required].                                                                                                                                                                                                                                                                                                                                                 |
-| `db_name`   | A unique name of the particular database [required].                                                                                                                                                                                                                                                                                                                                                                                                           |
-| `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. |
-| `db_path`   | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required].                                                                                                                                                                                                                          |
+| Column      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `tool`      | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database.                                                                                                                                                                                                                                                                                                                                                                                                              |
+| `db_name`   | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database.                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| `db_params` | Any parameters of the given taxonomic classifier/profiler that you wish to specify that the taxonomic classifier/profiling tool should use when profiling against this specific database. Can be empty to use taxonomic classifier/profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. For Bracken databases, must at a minimum contain a `;` separating Kraken2 from Bracken parameters. |
+| `db_path`   | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required].                                                                                                                                                                                                                                                                                                                                                                       |

-> 💡 You can also specify the same database directory/file twice (ensuring unique `db_name`s) and specify different parameters for each database to compare the effect of different parameters during profiling.
+> 💡 You can also specify the same database directory/file twice (ensuring unique `db_name`s) and specify different parameters for each database to compare the effect of different parameters during classification/profiling.

 nf-core/taxprofiler will automatically decompress and extract any compressed archives for you.

-Expected (uncompressed) database files for each tool are as follows:
+The (uncompressed) database paths (`db_path`) for each tool are expected to contain:

- **MALT** output of `malt-build`. A directory containing:
-  - `ref.idx`
-  - `taxonomy.idx`
-  - `taxonomy.map`
-  - `index0.idx`
-  - `table0.idx`
-  - `table0.db`
-  - `ref.inf`
-  - `ref.db`
-  - `taxonomy.tre`
- **Kraken2** output of `kraken2-build` command(s) A directory containing:
-  - `opts.k2d`
-  - `hash.k2d`
-  - `taxo.k2d`
- **Centrifuge** output of `centrifuge-build`. A directory containing:
-  - `<database_name>.<number>.cf`
-  - `<database_name>.<number>.cf`
-  - `<database_name>.<number>.cf`
-  - `<database_name>.<number>.cf`
- **MetaPhlAn3** generated with `metaphlan --install` or downloaded from links on the [MetaPhlAn3 wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#customizing-the-database). A directory containing:
-  - `mpa_v30_CHOCOPhlAn_201901.pkl`
-  - `mpa_v30_CHOCOPhlAn_201901.pkl`
-  - `mpa_v30_CHOCOPhlAn_201901.fasta`
-  - `mpa_v30_CHOCOPhlAn_201901.3.bt2`
-  - `mpa_v30_CHOCOPhlAn_201901.4.bt2`
-  - `mpa_v30_CHOCOPhlAn_201901.1.bt2`
-  - `mpa_v30_CHOCOPhlAn_201901.2.bt2`
-  - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
-  - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
-  - `mpa_latest`
- **Kaiju** output of `kaiju-makedb`. A directory containing:
-  - `kaiju_db_*.fmi`
-  - `nodes.dmp`
-  - `names.dmp`
- **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
-  to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
-  - `<database_name>.dmnd`
- **mOTUs** is composed of code and database together. The mOTUs tools
-  [`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
-  is used to prepare the mOTUs database and create a file with the version information.
-  The database download step can be time consuming and the database will be consisting
-  with same release version of the mOTUs tools. The database for same version tools
-  can be thus reused for multiple runs. Users can download the database once using the script above and
-  specify the path the database to the TSV table provided to `--databases`.
+- [**Bracken**:](#bracken-custom-database) output of the combined `kraken2-build` and `bracken-build` process.
+- [**Centrifuge**:](#centrifuge-custom-database) output of `centrifuge-build`.
+- [**DIAMOND**:](#diamond-custom-database) output of `diamond makedb`.
+- [**Kaiju**:](#kaiju-custom-database) output of `kaiju-makedb`.
+- [**Kraken2**:](#kraken2-custom-database) output of `kraken2-build` command(s).
+- [**KrakenUniq**:](#krakenuniq-custom-database) output of `krakenuniq-build` command(s).
+- [**MALT**](#malt-custom-database) output of `malt-build`.
+- [**MetaPhlAn3**:](#metaphlan3-custom-database) output of with `metaphlan --install` or downloaded from links on the [MetaPhlAn3 wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#customizing-the-database).
+- [**mOTUs**:](#motus-custom-database) the directory `db_mOTU/` that is downloaded via `motus downloadDB`.
+
+> ℹ️ Click the links in the list above for short quick-reference tutorials how to generate custom databases for each tool.

 ## Running the pipeline

@ -152,6 +144,8 @@ nextflow run nf-core/taxprofiler --input samplesheet.csv --databases databases.c

 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.

+When running nf-core/taxprofiler, every step and tool is 'opt in'. To run a given classifier/profiler you must make sure to supply both a database in your `<database>.csv` and supply `--run_<profiler>` flag to your command. Omitting either will result in the classification/profiling tool not executing. If you wish to perform pre-processing (adapter clipping, merge running etc.) or post-processing (visualisation) steps, these are also opt in with a `--perform_<step>` flag. In some cases, the pre- and post-processing steps may also require additional files. Please check the parameters tab of this documentation for more information.
+
 Note that the pipeline will create the following files in your working directory:

 ```bash
@ -161,29 +155,58 @@ work                # Directory containing the nextflow working files
 # Other nextflow hidden files, eg. history of pipeline runs and old logs.
 ```

+If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file.
+
+Pipeline settings can be provided in a `yaml` or `json` file via `-params-file <file>`.
+
+> ⚠️ Do not use `-c <file>` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
+> The above pipeline run specified with a params file in yaml format:
+
+```bash
+nextflow run nf-core/taxprofiler -profile docker -params-file params.yaml
+```
+
+with `params.yaml` containing:
+
+```yaml
+input: './samplesheet.csv'
+outdir: './results/'
+genome: 'GRCh37'
+input: 'data'
+<...>
+```
+
+You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
+
+### Sequencing quality control
+
+[`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. nf-core taxprofiler offers [`falco`](https://github.com/smithlabcode/falco) as an drop-in replacement, with supposedly better improvement particularly for long reads.
+
 ### Preprocessing Steps

-nf-core/taxprofiler offers four main preprocessing steps
+nf-core/taxprofiler offers four main preprocessing steps for preprocessing raw sequencing reads:

- Read processing: adapter clipping and pair-merging.
- Complexity filtering: removal of low-sequence complexity reads.
- Host read-removal: removal of reads aligning to reference genome(s) of a host.
- Run merging: concatenation of multiple FASTQ chunks/sequencing runs/libraries of a sample.
+- [**Read processing**](#read-processing): adapter clipping and pair-merging.
+- [**Complexity filtering**](#complexity-filtering): removal of low-sequence complexity reads.
+- [**Host read-removal**](#host-read-removal): removal of reads aligning to reference genome(s) of a host.
+- [**Run merging**](#run-merging): concatenation of multiple FASTQ chunks/sequencing runs/libraries of a sample.
+
+> ℹ️ You can save the 'final' reads used for classification/profiling from any combination of these steps with `--save_analysis_ready_reads`.

 #### Read Processing

 Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.

-It is highly recommended to run this on raw reads to remove artefacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
+It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles. If you have public data, normally these should have been corrected for, however you should still check that these steps have indeed been already performed.

-There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
+There are currently two options for short-read preprocessing: [`fastp`](https://github.com/OpenGene/fastp) or [`adapterremoval`](https://github.com/MikkelSchubert/adapterremoval).

-For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
-By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to exclude unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_excludeunmerged`).
+For adapter clipping, you can either rely on the tool's default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
+By default, paired-end merging is not activated. In this case paired-end 'alignment' against the reference databases is performed where supported, and if not, supported pairs will be independently classified/profiled. If paired-end merging is activated you can also specify whether to include unmerged reads in the reads sent for classification/profiling (`--shortread_qc_mergepairs` and `--shortread_qc_includeunmerged`).
 You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
-Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
+Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during classification/profiling, with minimal gain.

-There is currently one option for long-read Oxford Nanopore processing: `porechop`.
+There is currently one option for long-read Oxford Nanopore processing: [`porechop`](https://github.com/rrwick/Porechop).

 For both short-read and long-read preprocessing, you can optionally save the resulting processed reads with `--save_preprocessed_reads`.

@ -191,21 +214,25 @@ For both short-read and long-read preprocessing, you can optionally save the res

 Complexity filtering can be activated via the `--perform_shortread_complexityfilter` flag.

-Complexity filtering is primarily a run-time optimisation step. It is not necessary for accurate taxonomic profiling, however it can speed up run-time of each tool by removing reads with low-diversity of nucleotides (e.g. with mono-nucleotide - `AAAAAAAA`, or di-nucleotide repeats `GAGAGAGAGAGAGAG`) that have a low-chance of giving an informative taxonomic ID as they can be associated with many different taxa. Removing these reads therefore saves computational time and resources.
+Complexity filtering is primarily a run-time optimisation step. It is not necessary for accurate taxonomic classification/profiling, however it can speed up run-time of each tool by removing reads with low-diversity of nucleotides (e.g. with mono-nucleotide - `AAAAAAAA`, or di-nucleotide repeats `GAGAGAGAGAGAGAG`) that have a low-chance of giving an informative taxonomic ID as they can be associated with many different taxa. Removing these reads therefore saves computational time and resources.

 There are currently three options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/), [`prinseq++`](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus), and [`fastp`](https://github.com/OpenGene/fastp#low-complexity-filter).

-The tools offer different algorithms and parameters for removing low complexity reads. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.
+There is one option for long-read quality filtering: [`Filtlong`](https://github.com/rrwick/Filtlong)
+
+The tools offer different algorithms and parameters for removing low complexity reads and quality filtering. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.

 You can optionally save the FASTQ output of the run merging with the `--save_complexityfiltered_reads`. If running with `fastp`, complexity filtering happens inclusively within the earlier shortread preprocessing step. Therefore there will not be an independent pipeline step for complexity filtering, and no independent FASTQ file (i.e. `--save_complexityfiltered_reads` will be ignored) - your complexity filtered reads will also be in the `fastp/` folder in the same file(s) as the preprocessed read.

-#### Host Removal
+> ⚠️ For nanopore data: we do not recommend performing any read preprocessing or complexity filtering if you are using ONTs Guppy toolkit for basecalling and post-processing.

-Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
+#### Host-Read Removal

-Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.
+Removal of possible-host reads from FASTQ files prior classification/profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.

-nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling.
+Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy classification/profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during classification/profiling that occur due to host-sequence contamination in reference genomes on public databases.
+
+nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2 for short reads and minimap2 for long reads, and the use of the unaligned reads for downstream classification/profiling.

 You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or a minimap2 `.mmi` file for `--longread_hostremoval_index`, however nf-core/taxprofiler will generate these for you if necessary. Pre-supplying the index directory or files can greatly speed up the process, and these can be re-used.

@ -217,15 +244,97 @@ For samples that may have been sequenced over multiple runs, or for FASTQ files

 For more information how to set up your input samplesheet, see [Multiple runs of the same sample](#multiple-runs-of-the-same-sample).

-Activating this functionality will concatenate the FASTQ files with the same sample name _after_ the optional preprocessing steps and _before_ profiling. Note that libraries with runs of different pairing types will **not** be merged and this will be indicated on output files with a `_se` or `_pe` suffix to the sample name accordingly.
+Activating this functionality will concatenate the FASTQ files with the same sample name _after_ the optional preprocessing steps and _before_ classification/profiling. Note that libraries with runs of different pairing types will **not** be merged and this will be indicated on output files with a `_se` or `_pe` suffix to the sample name accordingly.

 You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.

-##### Profiling
+#### Classification and Profiling
+
+The following sections provide tips and suggestions for running the different taxonomic classification and profiling tools _within the pipeline_. For advice and/or guidance whether you should run a particular tool on your specific data, please see the documentation of each tool!
+
+An important distinction between the different tools in included in the pipeline is classification versus profiling. Taxonomic _classification_ is concerned with simply detecting the presence of species in a given sample. Taxonomic _profiling_ involves additionally estimating the _abundance_ of each species.
+
+Note that not all taxonomic classification tools (e.g. Kraken, MALT, Kaiju) performs _profiling_, but all taxonomic profilers (e.g. MetaPhlAn, mOTUs, Bracken) must perform some form of _classification_ prior to profiling.
+
+For advice as to which tool to run in your context, please see the documentation of each tool.
+
+> 🖊️ If you would like to change this behaviour, please contact us on the [nf-core slack](https://nf-co.re/join) and we can discuss this.
+
+Not all tools currently have dedicated tips, suggestions and/or recommendations, however we welcome further contributions for existing and additional tools via pull requests to the [nf-core/taxprofiler repository](https://github.com/nf-core/taxprofiler)!
+
+##### Bracken
+
+You must make sure to also activate Kraken2 to run Bracken in the pipeline.
+
+It is unclear whether Bracken is suitable for running long reads, as it makes certain assumptions about read lengths. Furthemore, during testing we found issues where Bracken would fail on the long-read test data.
+
+Therefore currently nf-core/taxprofiler does not run Bracken on data specified as being sequenced with `OXFORD_NANOPORE` in the input samplesheet.

-###### MALT
+##### Centrifuge

-nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
+Centrifuge currently does not accept FASTA files as input, therefore no output will be produced for these input files.
+
+##### DIAMOND
+
+DIAMOND only allows output of a single file format at a time, therefore parameters such `--diamond_save_reads` supplied will result in only aligned reads in SAM format will be produced, no taxonomic profiles will be available. Be aware of this when setting up your pipeline runs, depending on your particular use case.
+
+##### Kaiju
+
+Currently, no specific tips or suggestions.
+
+##### Kraken2
+
+Currently, no specific tips or suggestions.
+
+##### KrakenUniq
+
+Currently, no specific tips or suggestions.
+
+##### MALT
+
+MALT does not support paired-end reads alignment (unlike other tools), therefore nf-core/taxprofiler aligns these as indepenent files if read-merging is skipped. If you skip merging, you can sum or average the results of the counts of the pairs.
+
+Krona can only be run on MALT output if path to Krona taxonomy database supplied to `--krona_taxonomy_directory`. Therefore if you do not supply the a Krona directory, Krona plots will not be produced for MALT.
+
+##### MetaPhlAn3
+
+MetaPhlAn3 currently does not accept FASTA files as input, therefore no output will be produced for these input files.
+
+##### mOTUs
+
+mOTUs currently does not accept FASTA files as input, therefore no output will be produced for these input files.
+
+#### Post Processing
+
+##### Visualisation
+
+nf-core/taxprofiler supports generation of Krona interactive pie chart plots for the following compatible tools.
+
+- Kraken2
+- Centrifuge
+- Kaiju
+- MALT
+
+> ⚠️ MALT KRONA plots cannot be generated automatically, you must also specify a Krona taxonomy directory with `--krona_taxonomy_directory` if you wish to generate these.
+
+##### Multi-Table Generation
+
+The main multiple-sample table from nf-core/taxprofiler is from a dedicated standalone tool originally developed for the pipeline - [Taxpasta](https://taxpasta.readthedocs.io/en/latest/). When providing `--run_profile_standardisation`, every classifier/profiler and database combination will get a standardised and multi-sample taxon table in the [`taxpasta/`](https://nf-co.re/taxprofiler/output) directory. These tables are structured in the same way, to facilitate comparison between the the results of the classifier/profiler
+
+In addition to per-sample profiles and standardised Taxpasta output, the pipeline also supports generation of 'native' multi-sample taxonomic profiles (i.e., those generated by the taxonomic profiling tools themselves or additional utility scripts provided by the tool authors), when providing `--run_profile_standardisation` to your pipeline.
+
+These are executed on a per-database level. I.e., you will get a multi-sample taxon table for each database you provide for each tool and will be placed in the same directory as the directories containing the per-sample profiles.
+
+The following tools will produce multi-sample taxon tables:
+
+- **Bracken** (via bracken's `combine_bracken_outputs.py` script)
+- **Centrifuge** (via KrakenTools' `combine_kreports.py` script)
+- **Kaiju** (via Kaiju's `kaiju2table` tool)
+- **Kraken2** (via KrakenTools' `combine_kreports.py` script)
+- **MetaPhlAn3** (via MetaPhlAn's `merge_metaphlan_tables.py` script)
+- **mOTUs** (via the `motus merge` command)
+
+Note that the multi-sample tables from the 'native' tools in each folders are [not inter-operable](https://taxpasta.readthedocs.io/en/latest/tutorials/getting-started/) with each other as they can have different formats and can contain additional and different data. In this case we refer you to use the standardised and merged output from Taxpasta, as described above.

 ### Updating the pipeline

@ -239,9 +348,13 @@ nextflow pull nf-core/taxprofiler

 It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.

-First, go to the [nf-core/taxprofiler releases page](https://github.com/nf-core/taxprofiler/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`.
+First, go to the [nf-core/taxprofiler releases page](https://github.com/nf-core/taxprofiler/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag.

-This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
+This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
+
+To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
+
+> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles.

 ## Core Nextflow arguments

@ -251,7 +364,7 @@ This version number will be logged in reports when you run the pipeline, so that

 Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments.

-Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/).
+Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below.

 > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.

@ -260,8 +373,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c
 Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important!
 They are loaded in sequence, so later profiles can overwrite earlier profiles.

-If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended.
+If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment.

+- `test`
+  - A profile with a complete configuration for automated testing
+  - Includes links to test data so needs no other parameters
 - `docker`
  - A generic configuration profile to be used with [Docker](https://docker.com/)
 - `singularity`
@ -272,11 +388,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
  - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/)
 - `charliecloud`
  - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
+- `apptainer`
+  - A generic configuration profile to be used with [Apptainer](https://apptainer.org/)
 - `conda`
-  - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud.
- `test`
-  - A profile with a complete configuration for automated testing
-  - Includes links to test data so needs no other parameters
+  - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer.

 ### `-resume`

@ -294,126 +409,356 @@ Specify the path to a specific config file (this is a core Nextflow command). Se

 Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped.

-For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue:
+To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website.

-```console
-[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1)
-Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)'
+### Custom Containers
+
+In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date.
+
+To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website.
+
+### Custom Tool Arguments
+
+A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default.
+
+To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website.
+
+### nf-core/configs
+
+In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile.
+
+See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files.
+
+If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs).
+
+## Azure Resource Requests
+
+To be used with the `azurebatch` profile by specifying the `-profile azurebatch`.
+We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required.
+
+Note that the choice of VM size depends on your quota and the overall workload during the analysis.
+For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes).
+
+## Running in the background
+
+Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished.
+
+The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file.
+
+Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time.
+Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs).
+
+## Nextflow memory requirements
+
+In some cases, the Nextflow Java virtual machines can start to request a large amount of memory.
+We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`):
+
+```bash
+NXF_OPTS='-Xms1g -Xmx4g'
+```
+
+## Tutorials
+
+### Retrieving databases or building custom databases

-Caused by:
-    Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137)
+Not all taxonomic profilers provide ready-made or default databases. Here we will give brief guidance on how to build custom databases for each supported taxonomic profiler.

-Command executed:
-    STAR \
-        --genomeDir star \
-        --readFilesIn WT_REP1_trimmed.fq.gz  \
-        --runThreadN 2 \
-        --outFileNamePrefix WT_REP1. \
-        <TRUNCATED>
+You should always consult the documentation of each tool for more information, as here we only provide short minimal-tutorials as quick reference guides (with no guarantee they are up to date).

-Command exit status:
-    137
+The following tutorials assumes you already have the tool available (e.g. installed locally, or via conda, docker etc.), and you have already downloaded the FASTA files you wish to build into a database.

-Command output:
-    (empty)
+#### Bracken custom database

-Command error:
-    .command.sh: line 9:  30 Killed    STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. <TRUNCATED>
-Work dir:
-    /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb
+Bracken does not require an independent database nor not provide any default databases for classification/profiling, but rather builds upon Kraken2 databases. See [Kraken2](#kraken2-custom-database) for more information on how to build these.

-Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
+In addition to a Kraken2 database, you also need to have the (average) read lengths (in bp) of your sequencing experiment, the K-mer size used to build the Kraken2 database, and Kraken2 available on your machine.
+
+```bash
+bracken-build -d <KRAKEN_DB_DIR> -k <KRAKEN_DB_KMER_LENGTH> -l <READLENGTH>
 ```

-To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN).
-We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`.
-If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9).
-The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements.
-The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB.
-Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB.
-The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections.
-
-```nextflow
-process {
-    withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' {
-        memory = 100.GB
-    }
-}
+> 🛈 You can speed up database construction by supplying the threads parameter (`-t`).
+
+> 🛈 If you do not have Kraken2 in your `$PATH` you can point to the binary with `-x /<path>/<to>/kraken2`.
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `bracken`
+  - `hash.k2d`
+  - `opts.k2d`
+  - `taxo.k2d`
+  - `database.kraken`
+  - `database100mers.kmer_distrib`
+  - `database100mers.kraken`
+  - `database150mers.kmer_distrib`
+  - `database150mers.kraken`
+
+</details>
+
+You can follow Bracken [tutorial](https://ccb.jhu.edu/software/bracken/index.shtml?t=manual) for more information.
+
+#### Centrifuge custom database
+
+To build a custom Centrifuge database, a user needs to download taxonomy files, make a custom `seqid2taxid.map` and combine the fasta files together.
+
+In total, you need four components: a tab-separated file mapping sequence IDs to taxonomy IDs (`--conversion-table`), a tab-separated file mapping taxonomy IDs to their parents and rank, up to the root of the tree (`--taxonomy-tree`), a pipe-separated file mapping taxonomy IDs to a name (`--name-table`), and the reference sequences.
+
+An example of custom `seqid2taxid.map`:
+
+```
+ NC_001133.9 4392
+ NC_012920.1 9606
+ NC_001134.8 4392
+ NC_001135.5 4392
 ```

-> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden.
->
-> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly.
+```bash
+centrifuge-download -o taxonomy taxonomy
+cat *.{fa,fna} > input-sequences.fna
+centrifuge-build -p 4 --conversion-table seqid2taxid.map --taxonomy-tree taxonomy/nodes.dmp --name-table taxonomy/names.dmp input-sequences.fna taxprofiler_cf
+```

-### Updating containers
+<details markdown="1">
+<summary>Expected files in database directory</summary>

-The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`.
+- `centrifuge`
+  - `<database_name>.<number>.cf`
+  - `<database_name>.<number>.cf`
+  - `<database_name>.<number>.cf`
+  - `<database_name>.<number>.cf`

-1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19)
-2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags)
-3. Create the custom config accordingly:
+</details>

-   - For Docker:
+For the Centrifuge custom database documentation, see [here](https://ccb.jhu.edu/software/centrifuge/manual.shtml#custom-database).

-     ```nextflow
-     process {
-         withName: PANGOLIN {
-             container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0'
-         }
-     }
-     ```
+#### DIAMOND custom database

-   - For Singularity:
+To create a custom database for DIAMOND, the user should download and unzip the NCBI's taxonomy files and the input FASTA files.

-     ```nextflow
-     process {
-         withName: PANGOLIN {
-             container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0'
-         }
-     }
-     ```
+The download and build steps are as follows:

-   - For Conda:
+```bash
+wget ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
+unzip taxdmp.zip

-     ```nextflow
-     process {
-         withName: PANGOLIN {
-             conda = 'bioconda::pangolin=3.0.5'
-         }
-     }
-     ```
+## warning: large file!
+wget ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.FULL.gz

-> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch.
+## warning: takes a long time!
+cat ../raw/*.faa | diamond makedb -d testdb-diamond --taxonmap prot.accession2taxid.FULL.gz --taxonnodes nodes.dmp --taxonnames names.dmp

-### nf-core/configs
+## clean up
+rm *dmp *txt *gz *prt *zip
+```

-In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile.
+<details markdown="1">
+<summary>Expected files in database directory</summary>

-See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files.
+- `diamond`
+  - `<database_name>.dmnd`

-If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs).
+</details>

-## Running in the background
+A detailed description can be found [here](https://github.com/bbuchfink/diamond/wiki/1.-Tutorial)

-Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished.
+#### Kaiju custom database

-The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file.
+To build a kaiju database, you need three components: a FASTA file with the protein sequences ,the NCBI taxonomy dump files, and you need to define the uppercase characters of the standard 20 amino acids you wish to include.

-Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time.
-Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs).
+> ⚠️ The headers of the protein fasta file must be numeric NCBI taxon identifiers of the protein sequences.

-## Nextflow memory requirements
+To download the NCBI taxonomy files, please run the following commands:

-In some cases, the Nextflow Java virtual machines can start to request a large amount of memory.
-We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`):
+```bash
+wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.zip
+unzip new_taxdump.zip
+```
+
+To build the database, run the following command (the contents of taxdump must be in the same location where you run the command):

 ```bash
-NXF_OPTS='-Xms1g -Xmx4g'
+kaiju-mkbwt -a ACDEFGHIKLMNPQRSTVWY -o proteins proteins.faa
+kaiju-mkfmi proteins
+```
+
+> 🛈 You can speed up database construction by supplying the threads parameter (`-t`).
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `kaiju`
+  - `kaiju_db_*.fmi`
+  - `nodes.dmp`
+  - `names.dmp`
+
+</details>
+
+For the Kaiju database construction documentation, see [here](https://github.com/bioinformatics-centre/kaiju#custom-database).
+
+#### Kraken2 custom database
+
+To build a Kraken2 database you need two components: a taxonomy (consisting of `names.dmp`, `nodes.dmp`, and `*accession2taxid`) files, and the FASTA files you wish to include.
+
+To pull the NCBI taxonomy, you can run the following:
+
+```bash
+kraken2-build --download-taxonomy --db <YOUR_DB_NAME>
+```
+
+You can then add your FASTA files with the following build command.
+
+```bash
+kraken2-build --add-to-library *.fna --db <YOUR_DB_NAME>
+```
+
+You can repeat this step multiple times to iteratively add more genomes prior building.
+
+Once all genomes are added to the library, you can build the database (and optionally clean it up):
+
+```bash
+kraken2-build --build --db <YOUR_DB_NAME>
+kraken2-build --clean --db <YOUR_DB_NAME>
+```
+
+You can then add the `<YOUR_DB_NAME>/` path to your nf-core/taxprofiler database input sheet.
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `kraken2`
+  - `opts.k2d`
+  - `hash.k2d`
+  - `taxo.k2d`
+
+</details>
+
+You can follow the Kraken2 [tutorial](https://github.com/DerrickWood/kraken2/blob/master/docs/MANUAL.markdown#custom-databases) for a more detailed description.
+
+#### KrakenUniq custom database
+
+For any KrakenUniq database, you require: taxonomy files, the FASTA files you wish to include, a `seqid2mapid` file, and a k-mer length.
+
+First you must make a `seqid2taxid.map` file which is a two column text file containing the FASTA sequence header and the NCBI taxonomy ID for each sequence:
+
+```
+MT192765.1  2697049
+```
+
+Then make a directory (`<DB_DIR_NAME>/`), containing the `seqid2taxid.map` file, and your FASTA files in a subdirectory called `library/` (these FASTA files can be symlinked). You must then run the `taxonomy` command on the `<DB_DIR_NAME>/` directory, and then build it.
+
+```bash
+mkdir -p <DB_DIR_NAME>/library
+mv `seqid2taxid.map` <DB_DIR_NAME>/
+mv *.fna  <DB_DIR_NAME>/library
+krakenuniq-download --db <DB_DIR_NAME>  taxonomy
+krakenuniq-build --db <DB_DIR_NAME> --kmer-len 31
+```
+
+> 🛈 You can speed up database construction by supplying the threads parameter (`--threads`) to `krakenuniq-build`.
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `krakenuniq`
+  - `opts.k2d`
+  - `hash.k2d`
+  - `taxo.k2d`
+  - `database.idx`
+  - `taxDB`
+
+</details>
+
+Please see the [KrakenUniq documentation](https://github.com/fbreitwieser/krakenuniq#database-building) for more information.
+
+#### MALT custom database
+
+To build a MALT database, you need the FASTA files to include, and an (unzipped) [MEGAN mapping 'db' file](https://software-ab.informatik.uni-tuebingen.de/download/megan6/) for your FASTA type. In addition to the input directory, output directory, and the mapping file database, you also need to specify the sequence type (DNA or Protein) with the `-s` flag.
+
+```bash
+malt-build -i <path>/<to>/<fasta>/*.{fna,fa,fasta} -a2t <path>/<to>/<map>.db -d <YOUR_DB_NAME>/  -s DNA
+```
+
+You can then add the `<YOUR_DB_NAME>/` path to your nf-core/taxprofiler database input sheet.
+
+⚠️ MALT generates very large database files and requires large amounts of RAM. You can reduce both by increasing the step size `-st` (with a reduction in sensitivity).
+
+> 🛈 MALT-build can be multi-threaded with `-t` to speed up building.
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `malt`
+  - `ref.idx`
+  - `taxonomy.idx`
+  - `taxonomy.map`
+  - `index0.idx`
+  - `table0.idx`
+  - `table0.db`
+  - `ref.inf`
+  - `ref.db`
+  - `taxonomy.tre`
+
+</details>
+
+See the [MALT manual](https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf) for more information.
+
+#### MetaPhlAn3 custom database
+
+MetaPhlAn3 does not allow (easy) construction of custom databases. Therefore we recommend to use the prebuilt database of marker genes that is provided by the developers.
+
+To do this you need to have `MetaPhlAn3` installed on your machine.
+
+```bash
+metaphlan --install --bowtie2db <YOUR_DB_NAME>/
+```
+
+You can then add the `<YOUR_DB_NAME>/` path to your nf-core/taxprofiler database input sheet.
+
+> 🛈 It is generally not recommended to modify this database yourself, thus this is currently not supported in the pipeline. However, it is possible to customise the existing database by adding your own marker genomes following the instructions [here](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.1#customizing-the-database).
+
+> 🖊️ If using your own database is relevant for you, please contact the nf-core/taxprofiler developers on the [nf-core slack](https://nf-co.re/join) and we will investigate supporting this.
+
+<details markdown="1">
+<summary>Expected files in database directory</summary>
+
+- `metaphlan3`
+  - `mpa_v30_CHOCOPhlAn_201901.pkl`
+  - `mpa_v30_CHOCOPhlAn_201901.pkl`
+  - `mpa_v30_CHOCOPhlAn_201901.fasta`
+  - `mpa_v30_CHOCOPhlAn_201901.3.bt2`
+  - `mpa_v30_CHOCOPhlAn_201901.4.bt2`
+  - `mpa_v30_CHOCOPhlAn_201901.1.bt2`
+  - `mpa_v30_CHOCOPhlAn_201901.2.bt2`
+  - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
+  - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
+  - `mpa_latest`
+
+</details>
+
+More information on the MetaPhlAn3 database can be found [here](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.1#installation).
+
+#### mOTUs custom database
+
+mOTUs does not provide the ability to construct custom databases. Therefore we recommend to use the the prebuilt database of marker genes provided by the developers.
+
+> ⚠️ **Do not change the directory name of the resulting database if moving to a central location** The database name of `db_mOTU/` is hardcoded in the mOTUs tool
+
+To do this you need to have `mOTUs` installed on your machine.
+
+```bash
+motus downloadDB
 ```

+Then supply the `db_mOTU/` path to your nf-core/taxprofiler database input sheet.
+
+> ⚠️ The `db_mOTU/` directory may be downloaded to somewhere in your Python's `site-package` directory. You will have to find this yourself as the exact location varies depends on installation method.
+
+More information on the mOTUs database can be found [here](https://motu-tool.org/installation.html).
+
 ## Troubleshooting and FAQs

-### I get a warning during centrifuge_kreport process with exit status 255.
+### I get a warning during centrifuge_kreport process with exit status 255

 When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.

--- a/lib/NfcoreSchema.groovy
+++ b/lib/NfcoreSchema.groovy
@ -2,6 +2,7 @@
 // This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template.
 //

+import nextflow.Nextflow
 import org.everit.json.schema.Schema
 import org.everit.json.schema.loader.SchemaLoader
 import org.everit.json.schema.ValidationException
@ -46,7 +47,6 @@ class NfcoreSchema {
            'quiet',
            'syslog',
            'v',
-            'version',

            // Options for `nextflow run` command
            'ansi',
@ -84,6 +84,7 @@ class NfcoreSchema {
            'stub-run',
            'test',
            'w',
+            'with-apptainer',
            'with-charliecloud',
            'with-conda',
            'with-dag',
@ -178,7 +179,7 @@ class NfcoreSchema {
        }

        if (has_error) {
-            System.exit(1)
+            Nextflow.error('Exiting!')
        }
    }

--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@ -32,6 +32,25 @@ class NfcoreTemplate {
        }
    }

+    //
+    // Generate version string
+    //
+    public static String version(workflow) {
+        String version_string = ""
+
+        if (workflow.manifest.version) {
+            def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+            version_string += "${prefix_v}${workflow.manifest.version}"
+        }
+
+        if (workflow.commitId) {
+            def git_shortsha = workflow.commitId.substring(0, 7)
+            version_string += "-g${git_shortsha}"
+        }
+
+        return version_string
+    }
+
    //
    // Construct and send completion email
    //
@ -61,7 +80,7 @@ class NfcoreTemplate {
        misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp

        def email_fields = [:]
-        email_fields['version']      = workflow.manifest.version
+        email_fields['version']      = NfcoreTemplate.version(workflow)
        email_fields['runName']      = workflow.runName
        email_fields['success']      = workflow.success
        email_fields['dateComplete'] = workflow.complete
@ -145,6 +164,64 @@ class NfcoreTemplate {
        output_tf.withWriter { w -> w << email_txt }
    }

+    //
+    // Construct and send a notification to a web server as JSON
+    // e.g. Microsoft Teams and Slack
+    //
+    public static void IM_notification(workflow, params, summary_params, projectDir, log) {
+        def hook_url = params.hook_url
+
+        def summary = [:]
+        for (group in summary_params.keySet()) {
+            summary << summary_params[group]
+        }
+
+        def misc_fields = [:]
+        misc_fields['start']                                = workflow.start
+        misc_fields['complete']                             = workflow.complete
+        misc_fields['scriptfile']                           = workflow.scriptFile
+        misc_fields['scriptid']                             = workflow.scriptId
+        if (workflow.repository) misc_fields['repository']  = workflow.repository
+        if (workflow.commitId)   misc_fields['commitid']    = workflow.commitId
+        if (workflow.revision)   misc_fields['revision']    = workflow.revision
+        misc_fields['nxf_version']                          = workflow.nextflow.version
+        misc_fields['nxf_build']                            = workflow.nextflow.build
+        misc_fields['nxf_timestamp']                        = workflow.nextflow.timestamp
+
+        def msg_fields = [:]
+        msg_fields['version']      = NfcoreTemplate.version(workflow)
+        msg_fields['runName']      = workflow.runName
+        msg_fields['success']      = workflow.success
+        msg_fields['dateComplete'] = workflow.complete
+        msg_fields['duration']     = workflow.duration
+        msg_fields['exitStatus']   = workflow.exitStatus
+        msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+        msg_fields['errorReport']  = (workflow.errorReport ?: 'None')
+        msg_fields['commandLine']  = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
+        msg_fields['projectDir']   = workflow.projectDir
+        msg_fields['summary']      = summary << misc_fields
+
+        // Render the JSON template
+        def engine       = new groovy.text.GStringTemplateEngine()
+        // Different JSON depending on the service provider
+        // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
+        def json_path     = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
+        def hf            = new File("$projectDir/assets/${json_path}")
+        def json_template = engine.createTemplate(hf).make(msg_fields)
+        def json_message  = json_template.toString()
+
+        // POST
+        def post = new URL(hook_url).openConnection();
+        post.setRequestMethod("POST")
+        post.setDoOutput(true)
+        post.setRequestProperty("Content-Type", "application/json")
+        post.getOutputStream().write(json_message.getBytes("UTF-8"));
+        def postRC = post.getResponseCode();
+        if (! postRC.equals(200)) {
+            log.warn(post.getErrorStream().getText());
+        }
+    }
+
    //
    // Print pipeline summary on completion
    //
@ -154,7 +231,7 @@ class NfcoreTemplate {
            if (workflow.stats.ignoredCount == 0) {
                log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
            } else {
-                log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+                log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
            }
        } else {
            log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
@ -242,6 +319,7 @@ class NfcoreTemplate {
    //
    public static String logo(workflow, monochrome_logs) {
        Map colors = logColours(monochrome_logs)
+        String workflow_version = NfcoreTemplate.version(workflow)
        String.format(
            """\n
            ${dashedLine(monochrome_logs)}
@ -250,7 +328,7 @@ class NfcoreTemplate {
            ${colors.blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${colors.yellow}}  {${colors.reset}
            ${colors.blue}  | \\| |       \\__, \\__/ |  \\ |___     ${colors.green}\\`-._,-`-,${colors.reset}
                                                    ${colors.green}`._,._,\'${colors.reset}
-            ${colors.purple}  ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
+            ${colors.purple}  ${workflow.manifest.name} ${workflow_version}${colors.reset}
            ${dashedLine(monochrome_logs)}
            """.stripIndent()
        )
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@ -21,19 +21,26 @@ class Utils {
        }

        // Check that all channels are present
-        def required_channels = ['conda-forge', 'bioconda', 'defaults']
-        def conda_check_failed = !required_channels.every { ch -> ch in channels }
+        // This channel list is ordered by required channel priority.
+        def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+        def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean

        // Check that they are in the right order
-        conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
-        conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+        def channel_priority_violation = false
+        def n = required_channels_in_order.size()
+        for (int i = 0; i < n - 1; i++) {
+            channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+        }

-        if (conda_check_failed) {
+        if (channels_missing | channel_priority_violation) {
            log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
                "  There is a problem with your Conda configuration!\n\n" +
                "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
-                "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
-                "  NB: The order of the channels matters!\n" +
+                "  Please refer to https://bioconda.github.io/\n" +
+                "  The observed channel order is \n" +
+                "  ${channels}\n" +
+                "  but the following channel order is required:\n" +
+                "  ${required_channels_in_order}\n" +
                "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
        }
    }
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@ -2,6 +2,8 @@
 // This file holds several functions specific to the main.nf workflow in the nf-core/taxprofiler pipeline
 //

+import nextflow.Nextflow
+
 class WorkflowMain {

    //
@ -9,19 +11,18 @@ class WorkflowMain {
    //
    public static String citation(workflow) {
        return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
-            // TODO nf-core: Add Zenodo DOI for pipeline after first release
-            //"* The pipeline\n" +
-            //"  https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
-            "* The nf-core framework\n" +
-            "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
-            "* Software dependencies\n" +
+            "* The pipeline\n" +
+            "  https://doi.org/10.5281/zenodo.7728364\n\n" +
+            '* The nf-core framework\n' +
+            '  https://doi.org/10.1038/s41587-020-0439-x\n\n' +
+            '* Software dependencies\n' +
            "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
    }

    //
-    // Print help to screen if required
+    // Generate help string
    //
-    public static String help(workflow, params, log) {
+    public static String help(workflow, params) {
        def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker"
        def help_string = ''
        help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs)
@ -32,9 +33,9 @@ class WorkflowMain {
    }

    //
-    // Print parameter summary log to screen
+    // Generate parameter summary log string
    //
-    public static String paramsSummaryLog(workflow, params, log) {
+    public static String paramsSummaryLog(workflow, params) {
        def summary_log = ''
        summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs)
        summary_log += NfcoreSchema.paramsSummaryLog(workflow, params)
@ -49,24 +50,30 @@ class WorkflowMain {
    public static void initialise(workflow, params, log) {
        // Print help to screen if required
        if (params.help) {
-            log.info help(workflow, params, log)
+            log.info help(workflow, params)
            System.exit(0)
        }

-        // Validate workflow parameters via the JSON schema
-        if (params.validate_params) {
-            NfcoreSchema.validateParameters(workflow, params, log)
+        // Print workflow version and exit on --version
+        if (params.version) {
+            String workflow_version = NfcoreTemplate.version(workflow)
+            log.info "${workflow.manifest.name} ${workflow_version}"
+            System.exit(0)
        }

        // Print parameter summary log to screen
+        log.info paramsSummaryLog(workflow, params)

-        log.info paramsSummaryLog(workflow, params, log)
+        // Validate workflow parameters via the JSON schema
+        if (params.validate_params) {
+            NfcoreSchema.validateParameters(workflow, params, log)
+        }

        // Check that a -profile or Nextflow config has been provided to run the pipeline
        NfcoreTemplate.checkConfigProvided(workflow, log)

        // Check that conda channels are set-up correctly
-        if (params.enable_conda) {
+        if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
            Utils.checkCondaChannels(log)
        }

@ -75,8 +82,7 @@ class WorkflowMain {

        // Check input has been provided
        if (!params.input) {
-            log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'"
-            System.exit(1)
+            Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
        }
    }
    //
@ -90,4 +96,5 @@ class WorkflowMain {
        }
        return null
    }
+
 }
--- a/lib/WorkflowTaxprofiler.groovy
+++ b/lib/WorkflowTaxprofiler.groovy
@ -2,21 +2,23 @@
 // This file holds several functions specific to the workflow/taxprofiler.nf in the nf-core/taxprofiler pipeline
 //

+import nextflow.Nextflow
+import groovy.text.SimpleTemplateEngine
+
 class WorkflowTaxprofiler {

    //
    // Check and validate parameters
    //
+
    public static void initialise(params, log) {
        genomeExistsError(params, log)
-
-        // TODO update as necessary
        //if (!params.fasta) {
-        //    log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
-        //    System.exit(1)
+        //    Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
        //}
    }

+
    //
    // Get workflow summary for MultiQC
    //
@ -42,17 +44,36 @@ class WorkflowTaxprofiler {
        yaml_file_text        += "data: |\n"
        yaml_file_text        += "${summary_section}"
        return yaml_file_text
-    }//
+    }
+
+    public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) {
+        // Convert  to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
+        def meta = [:]
+        meta.workflow = run_workflow.toMap()
+        meta["manifest_map"] = run_workflow.manifest.toMap()
+
+        meta["doi_text"] = meta.manifest_map.doi ? "(doi: <a href=\'https://doi.org/${meta.manifest_map.doi}\'>${meta.manifest_map.doi}</a>)" : ""
+        meta["nodoi_text"] = meta.manifest_map.doi ? "": "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"
+
+        def methods_text = mqc_methods_yaml.text
+
+        def engine =  new SimpleTemplateEngine()
+        def description_html = engine.createTemplate(methods_text).make(meta)
+
+        return description_html
+    }
+
+    //
    // Exit pipeline if incorrect --genome key provided
    //
    private static void genomeExistsError(params, log) {
        if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
-            log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+            def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
                "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
                "  Currently, the available genome keys are:\n" +
                "  ${params.genomes.keySet().join(", ")}\n" +
                "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
-            System.exit(1)
+            Nextflow.error(error_string)
        }
    }
 }
--- a/main.nf
+++ b/main.nf
@ -4,7 +4,7 @@
    nf-core/taxprofiler
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Github : https://github.com/nf-core/taxprofiler
-Website: https://nf-co.re/taxprofiler
+    Website: https://nf-co.re/taxprofiler
    Slack  : https://nfcore.slack.com/channels/taxprofiler
 ----------------------------------------------------------------------------------------
 */
--- a/modules.json
+++ b/modules.json
@ -2,148 +2,221 @@
    "name": "nf-core/taxprofiler",
    "homePage": "https://github.com/nf-core/taxprofiler",
    "repos": {
-        "nf-core/modules": {
-            "git_url": "https://github.com/nf-core/modules.git",
+        "https://github.com/nf-core/modules.git": {
            "modules": {
-                "adapterremoval": {
-                    "branch": "master",
-                    "git_sha": "879d42c5e28661fe0a5e744c9e2c515868f9e08a"
-                },
-                "bbmap/bbduk": {
-                    "branch": "master",
-                    "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c"
-                },
-                "bowtie2/align": {
-                    "branch": "master",
-                    "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c"
-                },
-                "bowtie2/build": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "cat/fastq": {
-                    "branch": "master",
-                    "git_sha": "b034029b59b1198075da8019074bc02051a6100e"
-                },
-                "centrifuge/centrifuge": {
-                    "branch": "master",
-                    "git_sha": "d2726fcf75063960f06b36d2229a4c0966614108"
-                },
-                "centrifuge/kreport": {
-                    "branch": "master",
-                    "git_sha": "734d0db6079a4aa43b6509b207e5d6feb35d4838"
-                },
-                "custom/dumpsoftwareversions": {
-                    "branch": "master",
-                    "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247"
-                },
-                "diamond/blastx": {
-                    "branch": "master",
-                    "git_sha": "3531824af826c16cd252bc5aa82ae169b244ebaa"
-                },
-                "fastp": {
-                    "branch": "master",
-                    "git_sha": "2c70c1c1951aaf884d2e8d8d9c871db79f7b35aa"
-                },
-                "fastqc": {
-                    "branch": "master",
-                    "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
-                },
-                "filtlong": {
-                    "branch": "master",
-                    "git_sha": "957cb9b83668075f4af101fc99502908cca487e3"
-                },
-                "gunzip": {
-                    "branch": "master",
-                    "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
-                },
-                "kaiju/kaiju": {
-                    "branch": "master",
-                    "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
-                },
-                "kaiju/kaiju2krona": {
-                    "branch": "master",
-                    "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f"
-                },
-                "kaiju/kaiju2table": {
-                    "branch": "master",
-                    "git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
-                },
-                "kraken2/kraken2": {
-                    "branch": "master",
-                    "git_sha": "409a308ba46284d8ebb48c2c1befd6f6433db3f7"
-                },
-                "krakentools/combinekreports": {
-                    "branch": "master",
-                    "git_sha": "ee0346b4d14ffdc15ce7e093ca1363cd07c9bd78"
-                },
-                "krakentools/kreport2krona": {
-                    "branch": "master",
-                    "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d"
-                },
-                "krona/ktimporttaxonomy": {
-                    "branch": "master",
-                    "git_sha": "0e9fd9370ad1845870b8a9c63fcc47d999a1739e"
-                },
-                "krona/ktimporttext": {
-                    "branch": "master",
-                    "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
-                },
-                "malt/run": {
-                    "branch": "master",
-                    "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
-                },
-                "megan/rma2info": {
-                    "branch": "master",
-                    "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
-                },
-                "metaphlan3/mergemetaphlantables": {
-                    "branch": "master",
-                    "git_sha": "36bcd675ae76a379a38165898a203f4915823f4f"
-                },
-                "metaphlan3/metaphlan3": {
-                    "branch": "master",
-                    "git_sha": "978087354eb72ac1f6e18a3f790fad9bc4d05840"
-                },
-                "minimap2/align": {
-                    "branch": "master",
-                    "git_sha": "1a5a9e7b4009dcf34e6867dd1a5a1d9a718b027b"
-                },
-                "minimap2/index": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "motus/merge": {
-                    "branch": "master",
-                    "git_sha": "54ff289487244bf15543ecfa62bd4df49be72b73"
-                },
-                "motus/profile": {
-                    "branch": "master",
-                    "git_sha": "b6ed584443ad68ac41e6975994139454a4f23c18"
-                },
-                "multiqc": {
-                    "branch": "master",
-                    "git_sha": "16eee433b87b303bda650131ac5a0b1ad725e166"
-                },
-                "porechop": {
-                    "branch": "master",
-                    "git_sha": "b78e19b9dae3671db2c7d4346fe04452c1debfab"
-                },
-                "prinseqplusplus": {
-                    "branch": "master",
-                    "git_sha": "f1c5384c31e985591716afdd732cf8c2ae29d05b"
-                },
-                "samtools/bam2fq": {
-                    "branch": "master",
-                    "git_sha": "5510ea39fe638594bc26ac34cadf4a84bf27d159"
-                },
-                "samtools/view": {
-                    "branch": "master",
-                    "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247"
-                },
-                "untar": {
-                    "branch": "master",
-                    "git_sha": "393dbd6ddafe3f18eac02893dd4a21e4d45de679"
+                "nf-core": {
+                    "adapterremoval": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "bbmap/bbduk": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "bowtie2/align": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "bowtie2/build": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "bracken/bracken": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "bracken/combinebrackenoutputs": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "cat/fastq": {
+                        "branch": "master",
+                        "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71",
+                        "installed_by": ["modules"]
+                    },
+                    "centrifuge/centrifuge": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "centrifuge/kreport": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543",
+                        "installed_by": ["modules"]
+                    },
+                    "diamond/blastx": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "falco": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/falco/falco.diff"
+                    },
+                    "fastp": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "filtlong": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71",
+                        "installed_by": ["modules"]
+                    },
+                    "kaiju/kaiju": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "kaiju/kaiju2krona": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "kaiju/kaiju2table": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "kraken2/kraken2": {
+                        "branch": "master",
+                        "git_sha": "7c695e0147df1157413e06246d9b0094617d3e6b",
+                        "installed_by": ["modules"]
+                    },
+                    "krakentools/combinekreports": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "krakentools/kreport2krona": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "krakenuniq/preloadedkrakenuniq": {
+                        "branch": "master",
+                        "git_sha": "a6eb17f65b3ee5761c25c075a6166c9f76733cee",
+                        "installed_by": ["modules"]
+                    },
+                    "krona/ktimporttaxonomy": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "krona/ktimporttext": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "malt/run": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "megan/rma2info": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "metaphlan3/mergemetaphlantables": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "metaphlan3/metaphlan3": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "minimap2/align": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "minimap2/index": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "motus/merge": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "motus/profile": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "multiqc": {
+                        "branch": "master",
+                        "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7",
+                        "installed_by": ["modules"]
+                    },
+                    "porechop/porechop": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/porechop/porechop/porechop-porechop.diff"
+                    },
+                    "prinseqplusplus": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/fastq": {
+                        "branch": "master",
+                        "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/index": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/stats": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "samtools/view": {
+                        "branch": "master",
+                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+                        "installed_by": ["modules"]
+                    },
+                    "taxpasta/merge": {
+                        "branch": "master",
+                        "git_sha": "ffa9641ee18f88aff974257cb50ba3cf8f7d143c",
+                        "installed_by": ["modules"]
+                    },
+                    "untar": {
+                        "branch": "master",
+                        "git_sha": "b9829e1064382745d8dff7f1d74d2138d2864f71",
+                        "installed_by": ["modules"]
+                    }
                }
            }
        }
--- a/modules/local/database_check.nf
+++ b/modules/local/database_check.nf
@ -1,25 +0,0 @@
-process DATABASE_CHECK {
-    tag "$databasesheet"
-
-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
-        'quay.io/biocontainers/python:3.8.3' }"
-
-    input:
-    path databasesheet
-
-    output:
-    path '*.csv'       , emit: csv
-    path "versions.yml", emit: versions
-
-    script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
-    """
-    cat $databasesheet >> database_sheet.valid.csv
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version | sed 's/Python //g')
-    END_VERSIONS
-    """
-}
--- a/modules/local/ensure_fastq_extension.nf
+++ b/modules/local/ensure_fastq_extension.nf
@ -1,31 +0,0 @@
-process ENSURE_FASTQ_EXTENSION {
-    tag "$meta.id"
-    label 'process_low'
-
-    conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
-        'biocontainers/biocontainers:v1.2.0_cv2' }"
-
-
-    input:
-    tuple val(meta), path(reads)
-
-    output:
-    tuple val(meta), path('*.fastq.gz'), emit: reads
-
-    script:
-    if (meta.single_end) {
-        fastq = "${reads.baseName}.fastq.gz"
-        """
-        ln -s '${reads}' '${fastq}'
-        """
-    } else {
-        first = "${reads[0].baseName}.fastq.gz"
-        second = "${reads[1].baseName}.fastq.gz"
-        """
-        ln -s '${reads[0]}' '${first}'
-        ln -s '${reads[1]}' '${second}'
-        """
-    }
-}
--- a/modules/local/kraken2_standard_report.nf
+++ b/modules/local/kraken2_standard_report.nf
@ -0,0 +1,32 @@
+process KRAKEN2_STANDARD_REPORT {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "conda-forge::sed=4.7"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'docker.io/ubuntu:20.04' }"
+
+    input:
+    tuple val(meta), path(report)
+
+    output:
+    tuple val(meta), path(result), emit: report
+    path 'versions.yml'          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    result = "${prefix}_standardized.kraken2.report.txt"
+    """
+    cut -f1-3,6-8 '${report}' > '${result}'
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cut: \$(echo \$(cut --version 2>&1) | sed 's/^.*(GNU coreutils) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
+
--- a/modules/local/krona_cleanup.nf
+++ b/modules/local/krona_cleanup.nf
@ -2,10 +2,10 @@ process KRONA_CLEANUP {
    tag "$meta.id"
    label 'process_low'

-    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+    conda "conda-forge::sed=4.7"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
-        'biocontainers/biocontainers:v1.2.0_cv1' }"
+        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+        'docker.io/ubuntu:20.04' }"

    input:
    tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@ -1,10 +1,11 @@
 process SAMPLESHEET_CHECK {
    tag "$samplesheet"
+    label 'process_single'

-    conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+    conda "conda-forge::python=3.8.3"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/python:3.8.3' :
-        'quay.io/biocontainers/python:3.8.3' }"
+        'biocontainers/python:3.8.3' }"

    input:
    path samplesheet
@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK {
    path '*.csv'       , emit: csv
    path "versions.yml", emit: versions

+    when:
+    task.ext.when == null || task.ext.when
+
    script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
    """
    check_samplesheet.py \\
--- a/modules/nf-core/modules/adapterremoval/main.nf
+++ b/modules/nf-core/modules/adapterremoval/main.nf
@ -2,7 +2,7 @@ process ADAPTERREMOVAL {
    tag "$meta.id"
    label 'process_medium'

-    conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
+    conda "bioconda::adapterremoval=2.3.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
        'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
@ -34,7 +34,7 @@ process ADAPTERREMOVAL {
        AdapterRemoval  \\
            --file1 $reads \\
            $args \\
-            $adapterlist \\
+            $list \\
            --basename ${prefix} \\
            --threads ${task.cpus} \\
            --seed 42 \\
@ -61,7 +61,7 @@ process ADAPTERREMOVAL {
            --file1 ${reads[0]} \\
            --file2 ${reads[1]} \\
            $args \\
-            $adapterlist \\
+            $list \\
            --basename ${prefix} \\
            --threads $task.cpus \\
            --seed 42 \\
--- a/modules/nf-core/modules/adapterremoval/meta.yml
+++ b/modules/nf-core/modules/adapterremoval/meta.yml
--- a/modules/nf-core/modules/bbmap/bbduk/main.nf
+++ b/modules/nf-core/modules/bbmap/bbduk/main.nf
@ -2,10 +2,10 @@ process BBMAP_BBDUK {
    tag "$meta.id"
    label 'process_medium'

-    conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
+    conda "bioconda::bbmap=39.01"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
-        'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
+        'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0':
+        'quay.io/biocontainers/bbmap:39.01--h5c4e2a8_0' }"

    input:
    tuple val(meta), path(reads)
@ -37,7 +37,7 @@ process BBMAP_BBDUK {
        &> ${prefix}.bbduk.log
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
-        bbmap: \$(bbversion.sh)
+        bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
    END_VERSIONS
    """
 }
--- a/modules/nf-core/modules/bbmap/bbduk/meta.yml
+++ b/modules/nf-core/modules/bbmap/bbduk/meta.yml
--- a/modules/nf-core/modules/bowtie2/align/main.nf
+++ b/modules/nf-core/modules/bowtie2/align/main.nf
@ -2,14 +2,14 @@ process BOWTIE2_ALIGN {
    tag "$meta.id"
    label "process_high"

-    conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null)
-    container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ?
-        "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" :
-        "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }"
+    conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' :
+        'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }"

    input:
-    tuple val(meta), path(reads)
-    path  index
+    tuple val(meta) , path(reads)
+    tuple val(meta2), path(index)
    val   save_unaligned
    val   sort_bam

@ -40,8 +40,8 @@ process BOWTIE2_ALIGN {
    def samtools_command = sort_bam ? 'sort' : 'view'

    """
-    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"`
-    [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"`
+    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
+    [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"`
    [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1

    bowtie2 \\
--- a/modules/nf-core/modules/bowtie2/align/meta.yml
+++ b/modules/nf-core/modules/bowtie2/align/meta.yml
@ -27,6 +27,11 @@ input:
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test', single_end:false ]
  - index:
      type: file
      description: Bowtie2 genome index files
--- a/modules/nf-core/modules/bowtie2/build/main.nf
+++ b/modules/nf-core/modules/bowtie2/build/main.nf
@ -2,17 +2,17 @@ process BOWTIE2_BUILD {
    tag "$fasta"
    label 'process_high'

-    conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null)
+    conda "bioconda::bowtie2=2.4.4"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' :
        'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }"

    input:
-    path fasta
+    tuple val(meta), path(fasta)

    output:
-    path 'bowtie2'      , emit: index
-    path "versions.yml" , emit: versions
+    tuple val(meta), path('bowtie2')    , emit: index
+    path "versions.yml"                 , emit: versions

    when:
    task.ext.when == null || task.ext.when
--- a/modules/nf-core/modules/bowtie2/build/meta.yml
+++ b/modules/nf-core/modules/bowtie2/build/meta.yml
@ -16,10 +16,20 @@ tools:
      doi: 10.1038/nmeth.1923
      licence: ["GPL-3.0-or-later"]
 input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test', single_end:false ]
  - fasta:
      type: file
      description: Input genome fasta file
 output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test', single_end:false ]
  - index:
      type: file
      description: Bowtie2 genome index files
--- a/modules/nf-core/bracken/bracken/main.nf
+++ b/modules/nf-core/bracken/bracken/main.nf
@ -0,0 +1,42 @@
+process BRACKEN_BRACKEN {
+    tag "$meta.id"
+    label 'process_low'
+
+    // WARN: Version information not provided by tool on CLI.
+    // Please update version string below when bumping container versions.
+    conda "bioconda::bracken=2.7"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
+        'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
+
+    input:
+    tuple val(meta), path(kraken_report)
+    path database
+
+    output:
+    tuple val(meta), path(bracken_report), emit: reports
+    path "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ""
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    bracken_report = "${prefix}.tsv"
+    // WARN: Version information not provided by tool on CLI.
+    // Please update version string below when bumping container versions.
+    def VERSION = '2.7'
+    """
+    bracken \\
+        ${args} \\
+        -d '${database}' \\
+        -i '${kraken_report}' \\
+        -o '${bracken_report}'
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bracken: ${VERSION}
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/bracken/bracken/meta.yml
+++ b/modules/nf-core/bracken/bracken/meta.yml
@ -0,0 +1,48 @@
+name: bracken_bracken
+description: Re-estimate taxonomic abundance of metagenomic samples analyzed by kraken.
+keywords:
+  - bracken
+  - metagenomics
+  - abundance
+  - kraken2
+tools:
+  - bracken:
+      description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
+      homepage: https://ccb.jhu.edu/software/bracken/
+      documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
+      tool_dev_url: https://github.com/jenniferlu717/Bracken
+      doi: "10.7717/peerj-cs.104"
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - kraken_report:
+      type: file
+      description: TSV file with six columns coming from kraken2 output
+      pattern: "*.{tsv}"
+  - database:
+      type: file
+      description: Directory containing the kraken2/Bracken files for analysis
+      pattern: "*"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reports:
+      type: file
+      description: TSV output report of the re-estimated abundances
+      pattern: "*.{tsv}"
+
+authors:
+  - "@Midnighter"
--- a/modules/nf-core/bracken/combinebrackenoutputs/main.nf
+++ b/modules/nf-core/bracken/combinebrackenoutputs/main.nf
@ -0,0 +1,37 @@
+process BRACKEN_COMBINEBRACKENOUTPUTS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::bracken=2.7"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
+        'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
+
+    input:
+    tuple val(meta), path(input)
+
+    output:
+    tuple val(meta), path("*.txt"), emit: txt
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // WARN: Version information not provided by tool on CLI.
+    // Please update version string below when bumping container versions.
+    def VERSION = '2.7'
+    """
+    combine_bracken_outputs.py \\
+        $args \\
+        --files ${input} \\
+        -o ${prefix}.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        combine_bracken_output: ${VERSION}
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/bracken/combinebrackenoutputs/meta.yml
+++ b/modules/nf-core/bracken/combinebrackenoutputs/meta.yml
@ -0,0 +1,41 @@
+name: "bracken_combinebrackenoutputs"
+description: Combine output of metagenomic samples analyzed by bracken.
+keywords:
+  - sort
+tools:
+  - "bracken":
+      description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
+      homepage: https://ccb.jhu.edu/software/bracken/
+      documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
+      tool_dev_url: https://github.com/jenniferlu717/Bracken
+      doi: "10.7717/peerj-cs.104"
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: List of output files from bracken
+      pattern: "*"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - txt:
+      type: file
+      description: Combined output in table format
+      pattern: "*.txt"
+
+authors:
+  - "@jfy133"
--- a/modules/nf-core/modules/cat/fastq/main.nf
+++ b/modules/nf-core/modules/cat/fastq/main.nf
@ -1,11 +1,11 @@
 process CAT_FASTQ {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+    conda "conda-forge::sed=4.7"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
-        'ubuntu:20.04' }"
+        'docker.io/ubuntu:20.04' }"

    input:
    tuple val(meta), path(reads, stageAs: "input*/*")
@ -20,9 +20,9 @@ process CAT_FASTQ {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def readList = reads.collect{ it.toString() }
+    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
    if (meta.single_end) {
-        if (readList.size > 1) {
+        if (readList.size >= 1) {
            """
            cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz

@ -33,7 +33,7 @@ process CAT_FASTQ {
            """
        }
    } else {
-        if (readList.size > 2) {
+        if (readList.size >= 2) {
            def read1 = []
            def read2 = []
            readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
@ -51,7 +51,7 @@ process CAT_FASTQ {

    stub:
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def readList = reads.collect{ it.toString() }
+    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
    if (meta.single_end) {
        if (readList.size > 1) {
            """
--- a/modules/nf-core/modules/cat/fastq/meta.yml
+++ b/modules/nf-core/modules/cat/fastq/meta.yml
@ -1,6 +1,7 @@
 name: cat_fastq
 description: Concatenates fastq files
 keywords:
+  - cat
  - fastq
  - concatenate
 tools:
@ -16,7 +17,7 @@ input:
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
-      type: list
+      type: file
      description: |
        List of input FastQ files to be concatenated.
 output:
--- a/modules/nf-core/modules/centrifuge/centrifuge/main.nf
+++ b/modules/nf-core/modules/centrifuge/centrifuge/main.nf
@ -2,7 +2,7 @@ process CENTRIFUGE_CENTRIFUGE {
    tag "$meta.id"
    label 'process_high'

-    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
+    conda "bioconda::centrifuge=1.0.4_beta"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
@ -41,7 +41,7 @@ process CENTRIFUGE_CENTRIFUGE {
    def sam_output = sam_format ? "--out-fmt 'sam'" : ''
    """
    ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
-    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/.1.cf//'`
+    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/\\.1.cf\$//'`
    centrifuge \\
        -x \$db_name \\
        -p $task.cpus \\
--- a/modules/nf-core/modules/centrifuge/centrifuge/meta.yml
+++ b/modules/nf-core/modules/centrifuge/centrifuge/meta.yml
--- a/modules/nf-core/modules/centrifuge/kreport/main.nf
+++ b/modules/nf-core/modules/centrifuge/kreport/main.nf
@ -1,8 +1,8 @@
 process CENTRIFUGE_KREPORT {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
+    conda "bioconda::centrifuge=1.0.4_beta"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
@ -22,7 +22,7 @@ process CENTRIFUGE_KREPORT {
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
-    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/.1.cf//'`
+    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/\\.1.cf\$//'`
    centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt

    cat <<-END_VERSIONS > versions.yml
--- a/modules/nf-core/modules/centrifuge/kreport/meta.yml
+++ b/modules/nf-core/modules/centrifuge/kreport/meta.yml
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
    label 'process_single'

    // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null)
+    conda "bioconda::multiqc=1.14"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' :
-        'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }"

    input:
    path versions
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml
+++ b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml
@ -1,7 +1,9 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
 name: custom_dumpsoftwareversions
 description: Custom module used to dump software versions within the nf-core pipeline template
 keywords:
  - custom
+  - dump
  - version
 tools:
  - custom:
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
+import platform
+from textwrap import dedent
+
+import yaml
+
+
+def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
+    html = [
+        dedent(
+            """\\
+            <style>
+            #nf-core-versions tbody:nth-child(even) {
+                background-color: #f2f2f2;
+            }
+            </style>
+            <table class="table" style="width:100%" id="nf-core-versions">
+                <thead>
+                    <tr>
+                        <th> Process Name </th>
+                        <th> Software </th>
+                        <th> Version  </th>
+                    </tr>
+                </thead>
+            """
+        )
+    ]
+    for process, tmp_versions in sorted(versions.items()):
+        html.append("<tbody>")
+        for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
+            html.append(
+                dedent(
+                    f"""\\
+                    <tr>
+                        <td><samp>{process if (i == 0) else ''}</samp></td>
+                        <td><samp>{tool}</samp></td>
+                        <td><samp>{version}</samp></td>
+                    </tr>
+                    """
+                )
+            )
+        html.append("</tbody>")
+    html.append("</table>")
+    return "\\n".join(html)
+
+
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
+
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
+
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
+
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
+
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
+
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
--- a/modules/nf-core/modules/diamond/blastx/main.nf
+++ b/modules/nf-core/modules/diamond/blastx/main.nf
@ -2,7 +2,7 @@ process DIAMOND_BLASTX {
    tag "$meta.id"
    label 'process_medium'

-    conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
+    conda "bioconda::diamond=2.0.15"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
        'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
@ -46,7 +46,7 @@ process DIAMOND_BLASTX {
            break
    }
    """
-    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
+    DB=`find -L ./ -name "*.dmnd" | sed 's/\\.dmnd\$//'`

    diamond \\
        blastx \\
--- a/modules/nf-core/modules/diamond/blastx/meta.yml
+++ b/modules/nf-core/modules/diamond/blastx/meta.yml
--- a/modules/nf-core/falco/falco.diff
+++ b/modules/nf-core/falco/falco.diff
@ -0,0 +1,16 @@
+Changes in module 'nf-core/falco'
+--- modules/nf-core/falco/main.nf
+++ modules/nf-core/falco/main.nf
+@@ -33,7 +33,9 @@
+         """
+     } else {
+         """
+-        falco $args --threads $task.cpus ${reads}
+        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+        falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
+ 
+         cat <<-END_VERSIONS > versions.yml
+         "${task.process}":
+
+************************************************************
--- a/modules/nf-core/modules/fastqc/main.nf
+++ b/modules/nf-core/modules/fastqc/main.nf
@ -1,18 +1,19 @@
-process FASTQC {
+process FALCO {
    tag "$meta.id"
-    label 'process_medium'
+    label 'process_single'

-    conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null)
+
+    conda "bioconda::falco=1.2.1"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
-        'quay.io/biocontainers/fastqc:0.11.9--0' }"
+        'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
+        'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"

    input:
    tuple val(meta), path(reads)

    output:
    tuple val(meta), path("*.html"), emit: html
-    tuple val(meta), path("*.zip") , emit: zip
+    tuple val(meta), path("*.txt") , emit: txt
    path  "versions.yml"           , emit: versions

    when:
@ -20,27 +21,25 @@ process FASTQC {

    script:
    def args = task.ext.args ?: ''
-    // Add soft-links to original FastQs for consistent naming in pipeline
    def prefix = task.ext.prefix ?: "${meta.id}"
-    if (meta.single_end) {
+    if ( reads.toList().size() == 1 ) {
        """
-        [ ! -f  ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
-        fastqc $args --threads $task.cpus ${prefix}.fastq.gz
+        falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html

        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
-            fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+            falco:\$( falco --version | sed -e "s/falco//g" )
        END_VERSIONS
        """
    } else {
        """
        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
-        fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
+        falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz

        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
-            fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+            falco:\$( falco --version | sed -e "s/falco//g" )
        END_VERSIONS
        """
    }
@ -48,12 +47,13 @@ process FASTQC {
    stub:
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
-    touch ${prefix}.html
-    touch ${prefix}.zip
+    touch ${prefix}_data.txt
+    touch ${prefix}_fastqc_data.html
+    touch ${prefix}_summary.txt

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
-        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+        falco: \$( falco --version | sed -e "s/falco v//g" )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/falco/meta.yml
+++ b/modules/nf-core/falco/meta.yml
@ -0,0 +1,52 @@
+name: falco
+description: Run falco on sequenced reads
+keywords:
+  - quality control
+  - qc
+  - adapters
+  - fastq
+tools:
+  - fastqc:
+      description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
+
+      homepage: "https://falco.readthedocs.io/"
+      documentation: "https://falco.readthedocs.io/"
+      tool_dev_url: "None"
+      doi: ""
+      licence: "['GPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - html:
+      type: file
+      description: FastQC like report
+      pattern: "*_{fastqc_report.html}"
+  - txt:
+      type: file
+      description: falco report data
+      pattern: "*_{data.txt}"
+  - txt:
+      type: file
+      description: falco summary file
+      pattern: "*_{summary.txt}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@lucacozzuto"
--- a/modules/nf-core/modules/fastp/main.nf
+++ b/modules/nf-core/modules/fastp/main.nf
@ -2,13 +2,14 @@ process FASTP {
    tag "$meta.id"
    label 'process_medium'

-    conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null)
+    conda "bioconda::fastp=0.23.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' :
        'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }"

    input:
    tuple val(meta), path(reads)
+    path  adapter_fasta
    val   save_trimmed_fail
    val   save_merged

@ -27,6 +28,7 @@ process FASTP {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
    def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
    // Added soft-links to original fastqs for consistent naming in MultiQC
    // Use single ended for interleaved. Add --interleaved_in in config.
@ -40,6 +42,7 @@ process FASTP {
            --thread $task.cpus \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
+            $adapter_list \\
            $fail_fastq \\
            $args \\
            2> ${prefix}.fastp.log \\
@ -61,6 +64,7 @@ process FASTP {
            --thread $task.cpus \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
+            $adapter_list \\
            $fail_fastq \\
            $args \\
            2> ${prefix}.fastp.log
@ -82,6 +86,7 @@ process FASTP {
            --out2 ${prefix}_2.fastp.fastq.gz \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
+            $adapter_list \\
            $fail_fastq \\
            $merge_fastq \\
            --thread $task.cpus \\
--- a/modules/nf-core/modules/fastp/meta.yml
+++ b/modules/nf-core/modules/fastp/meta.yml
@ -23,6 +23,10 @@ input:
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively. If you wish to run interleaved paired-end data,  supply as single-end data
        but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+  - adapter_fasta:
+      type: file
+      description: File in FASTA format containing possible adapters to remove.
+      pattern: "*.{fasta,fna,fas,fa}"
  - save_trimmed_fail:
      type: boolean
      description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@ -0,0 +1,51 @@
+process FASTQC {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::fastqc=0.11.9"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
+        'quay.io/biocontainers/fastqc:0.11.9--0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.html"), emit: html
+    tuple val(meta), path("*.zip") , emit: zip
+    path  "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // Make list of old name and new name pairs to use for renaming in the bash while loop
+    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+    def rename_to = old_new_pairs*.join(' ').join(' ')
+    def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+    """
+    printf "%s %s\\n" $rename_to | while read old_name new_name; do
+        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+    done
+    fastqc $args --threads $task.cpus $renamed_files
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.html
+    touch ${prefix}.zip
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/fastqc/meta.yml
+++ b/modules/nf-core/modules/fastqc/meta.yml
--- a/modules/nf-core/modules/filtlong/main.nf
+++ b/modules/nf-core/modules/filtlong/main.nf
@ -2,7 +2,7 @@ process FILTLONG {
    tag "$meta.id"
    label 'process_low'

-    conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
+    conda "bioconda::filtlong=0.2.1"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
        'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
--- a/modules/nf-core/modules/filtlong/meta.yml
+++ b/modules/nf-core/modules/filtlong/meta.yml
--- a/modules/nf-core/modules/gunzip/main.nf
+++ b/modules/nf-core/modules/gunzip/main.nf
@ -1,11 +1,11 @@
 process GUNZIP {
    tag "$archive"
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+    conda "conda-forge::sed=4.7"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
-        'ubuntu:20.04' }"
+        'docker.io/ubuntu:20.04' }"

    input:
    tuple val(meta), path(archive)
--- a/modules/nf-core/gunzip/meta.yml
+++ b/modules/nf-core/gunzip/meta.yml
@ -0,0 +1,35 @@
+name: gunzip
+description: Compresses and decompresses files.
+keywords:
+  - gunzip
+  - compression
+  - decompression
+tools:
+  - gunzip:
+      description: |
+        gzip is a file format and a software application used for file compression and decompression.
+      documentation: https://www.gnu.org/software/gzip/manual/gzip.html
+      licence: ["GPL-3.0-or-later"]
+input:
+  - meta:
+      type: map
+      description: |
+        Optional groovy Map containing meta information
+        e.g. [ id:'test', single_end:false ]
+  - archive:
+      type: file
+      description: File to be compressed/uncompressed
+      pattern: "*.*"
+output:
+  - gunzip:
+      type: file
+      description: Compressed/uncompressed file
+      pattern: "*.*"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@jfy133"
--- a/modules/nf-core/modules/kaiju/kaiju/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju/main.nf
@ -2,7 +2,7 @@ process KAIJU_KAIJU {
    tag "$meta.id"
    label 'process_high'

-    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    conda "bioconda::kaiju=1.8.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
--- a/modules/nf-core/modules/kaiju/kaiju/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju/meta.yml
--- a/modules/nf-core/modules/kaiju/kaiju2krona/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf
@ -1,8 +1,8 @@
 process KAIJU_KAIJU2KRONA {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    conda "bioconda::kaiju=1.8.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
--- a/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml
--- a/modules/nf-core/modules/kaiju/kaiju2table/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju2table/main.nf
@ -1,8 +1,8 @@
 process KAIJU_KAIJU2TABLE {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    conda "bioconda::kaiju=1.8.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }"
--- a/modules/nf-core/modules/kaiju/kaiju2table/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju2table/meta.yml
--- a/modules/nf-core/modules/kraken2/kraken2/main.nf
+++ b/modules/nf-core/modules/kraken2/kraken2/main.nf
@ -2,7 +2,7 @@ process KRAKEN2_KRAKEN2 {
    tag "$meta.id"
    label 'process_high'

-    conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
+    conda "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
        'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
@ -31,7 +31,7 @@ process KRAKEN2_KRAKEN2 {
    def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
    def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ""
    def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
-    def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
+    def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null"
    def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""

    """
--- a/modules/nf-core/modules/kraken2/kraken2/meta.yml
+++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml
--- a/modules/nf-core/modules/krakentools/combinekreports/main.nf
+++ b/modules/nf-core/modules/krakentools/combinekreports/main.nf
@ -1,7 +1,7 @@
 process KRAKENTOOLS_COMBINEKREPORTS {
-    label 'process_low'
+    label 'process_single'

-    conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
+    conda "bioconda::krakentools=1.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
        'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
--- a/modules/nf-core/modules/krakentools/combinekreports/meta.yml
+++ b/modules/nf-core/modules/krakentools/combinekreports/meta.yml
--- a/modules/nf-core/modules/krakentools/kreport2krona/main.nf
+++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf
@ -1,9 +1,9 @@
 process KRAKENTOOLS_KREPORT2KRONA {
    tag "$meta.id"
-    label 'process_low'
+    label 'process_single'

    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-    conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
+    conda "bioconda::krakentools=1.2"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
        'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
--- a/modules/nf-core/modules/krakentools/kreport2krona/meta.yml
+++ b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml
--- a/Show More
+++ b/Show More