Merge branch 'dev' into nf-core-template-merge-2.7.2

2024-11-22 08:49:55 +00:00 · 2022-12-20 15:14:41 +01:00 · 2022-12-20 15:14:41 +01:00 · d691b89727
commit d691b89727
parent ab591e01ff 25bb4304e8
126 changed files with 21408 additions and 376 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -22,10 +22,27 @@ jobs:
    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}"
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        NXF_VER:
          - "22.10.1"
          - "latest-everything"
        parameters:
          - "--preprocessing_qc_tool falco"
          - "--perform_longread_qc false"
          - "--perform_shortread_qc false"
          - "--shortread_qc_tool fastp"
          - "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_includeunmerged"
          - "--shortread_qc_tool fastp --shortread_qc_mergepairs"
          - "--shortread_qc_tool adapterremoval"
          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_includeunmerged"
          - "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
          - "--shortread_complexityfilter_tool bbduk"
          - "--shortread_complexityfilter_tool prinseqplusplus"
          - "--perform_runmerging"
          - "--perform_runmerging --shortread_qc_mergepairs"
          - "--shortread_complexityfilter false --perform_shortread_hostremoval"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v3
@ -35,9 +52,122 @@ jobs:
        with:
          version: "${{ matrix.NXF_VER }}"
-      - name: Run pipeline with test data
+      - name: Show current locale
-        # TODO nf-core: You can customise CI pipeline run tests as required
+        run: locale
-        # For example: adding multiple test runs with different parameters
+
-        # Remember that you can parallelise this by using strategy.matrix
+      - name: Set UTF-8 enabled locale
        run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          sudo locale-gen en_US.UTF-8
          sudo update-locale LANG=en_US.UTF-8
      - name: Run pipeline with test data
        uses: Wandalen/wretry.action@v1.0.11
        with:
          command: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
          attempt_limit: 3
  motus:
    name: Test mOTUs with workflow parameters
    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
    runs-on: ubuntu-latest
    strategy:
      matrix:
        NXF_VER:
          - "22.10.1"
          - "latest-everything"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2
      - name: Install Nextflow
        uses: nf-core/setup-nextflow@v1
        with:
          version: "${{ matrix.NXF_VER }}"
      - name: Show current locale
        run: locale
      - name: Set UTF-8 enabled locale
        run: |
          sudo locale-gen en_US.UTF-8
          sudo update-locale LANG=en_US.UTF-8
      - name: Prepare the database
        run: |
          wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
          python downloadDB.py > download_db_log.txt
          echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
          echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
      - name: Run pipeline with test data
        uses: Wandalen/wretry.action@v1.0.11
        with:
          command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
          attempt_limit: 3
  krakenuniq:
    name: Test KrakenUniq with workflow parameters
    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
    runs-on: ubuntu-latest
    strategy:
      matrix:
        NXF_VER:
          - "22.10.1"
          - "latest-everything"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2
      - name: Install Nextflow
        uses: nf-core/setup-nextflow@v1
        with:
          version: "${{ matrix.NXF_VER }}"
      - name: Show current locale
        run: locale
      - name: Set UTF-8 enabled locale
        run: |
          sudo locale-gen en_US.UTF-8
          sudo update-locale LANG=en_US.UTF-8
      - name: Run pipeline with test data
        uses: Wandalen/wretry.action@v1.0.11
        with:
          command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
          attempt_limit: 3
  malt:
    name: Test MALT with workflow parameters
    if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
    runs-on: ubuntu-latest
    strategy:
      matrix:
        NXF_VER:
          - "22.10.1"
          - "latest-everything"
    steps:
      - name: Check out pipeline code
        uses: actions/checkout@v2
      - name: Install Nextflow
        uses: nf-core/setup-nextflow@v1
        with:
          version: "${{ matrix.NXF_VER }}"
      - name: Show current locale
        run: locale
      - name: Set UTF-8 enabled locale
        run: |
          sudo locale-gen en_US.UTF-8
          sudo update-locale LANG=en_US.UTF-8
      - name: Run pipeline with test data
        uses: Wandalen/wretry.action@v1.0.11
        with:
          command: nextflow run ${GITHUB_WORKSPACE} -profile test_nothing,docker --run_malt --outdir ./results
          attempt_limit: 3
--- a/.prettierignore
+++ b/.prettierignore
@ -10,3 +10,4 @@ testing/
 testing*
 *.pyc
 bin/
 tests/
--- a/CITATIONS.md
+++ b/CITATIONS.md
@ -13,8 +13,67 @@
 - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
  > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
 - [fastp](https://doi.org/10.1093/bioinformatics/bty560)
  > Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
 - [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)
  > Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
 - [Porechop](https://github.com/rrwick/Porechop)
 - [BBTools](http://sourceforge.net/projects/bbmap/)
 - [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)
  > Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
 - [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)
  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
 - [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)
  > Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
 - [Bracken](https://doi.org/10.7717/peerj-cs.104)
  > Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
 - [Krona](https://doi.org/10.1186/1471-2105-12-385)
  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
  > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
 - [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
  > Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
 - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
  > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
 - [Centrifuge](https://doi.org/10.1101/gr.210641.116)
  > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
 - [DIAMOND](https://doi.org/10.1038/nmeth.3176)
 > Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
 - [FILTLONG](https://github.com/rrwick/Filtlong)
 - [falco](https://doi.org/10.12688/f1000research.21142.2)
 > de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
 ## Software packaging/containerisation tools
 - [Anaconda](https://anaconda.com)
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_dark.png#gh-dark-mode-only)
+# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_dark.png#gh-dark-mode-only)
 [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
@ -12,9 +12,11 @@
 ## Introduction
 > ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
 <!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
-**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for Taxonomic profiling of shotgun metagenomic data.
+**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables.
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
@ -26,12 +28,33 @@ On release, automated continuous integration tests run the pipeline on a full-si
 <!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
-1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
+![](docs/images/taxprofiler_tube.png)
-2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+
 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
 2. Performs optional read pre-processing
   - Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
   - Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
   - Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
   - Run merging
 3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
 4. Performs taxonomic profiling using one or more of:
   - [Kraken2](https://ccb.jhu.edu/software/kraken2/)
   - [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
   - [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
   - [DIAMOND](https://github.com/bbuchfink/diamond)
   - [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
   - [Kaiju](https://kaiju.binf.ku.dk/)
   - [mOTUs](https://motu-tool.org/)
   - [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
 5. Perform optional post-processing with:
   - [bracken](https://ccb.jhu.edu/software/bracken/)
 6. Standardises output tables
 7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
 8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))
 ## Quick Start
-1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`)
+1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`).
 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
@ -50,10 +73,8 @@ On release, automated continuous integration tests run the pipeline on a full-si
 4. Start running your own analysis!
-   <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
+   ```console
-
+   nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
   ```bash
   nextflow run nf-core/taxprofiler --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
   ```
 ## Documentation
@ -66,7 +87,7 @@ nf-core/taxprofiler was originally written by nf-core community.
 We thank the following people for their extensive assistance in the development of this pipeline:
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
+[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor),[Thomas A. Christensen II](https://github.com/MillironX), [Jianhong Ou](https://github.com/jianhong), [Rafal Stepien](https://github.com/rafalstepien), [Mahwash Jamy](https://github.com/mjamy).
 ## Contributions and Support
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@ -11,3 +11,249 @@ report_section_order:
    order: -1002
 export_plots: true
 custom_logo: "nf-core-taxprofiler_logo_custom_light.png"
 custom_logo_url: https://nf-co.re/taxprofiler
 custom_logo_title: "nf-core/taxprofiler"
 run_modules:
  - fastqc
  - adapterRemoval
  - bbduk
  - prinseqplusplus
  - fastp
  - filtlong
  - bowtie2
  - minimap2
  - samtools
  - kraken
  - kaiju
  - metaphlan
  - diamond
  - malt
  - motus
  - porechop
  - custom_content
 #extra_fn_clean_exts:
 #    - '_fastp'
 #    - '.pe.settings'
 #    - '.se.settings'
 top_modules:
  - "fastqc":
      name: "FastQC (pre-Trimming)"
      path_filters:
        - "*raw_*fastqc.zip"
  - "fastqc":
      name: "Falco (pre-Trimming)"
      path_filters:
        - "*_raw_falco_*_report.html"
  - "fastp"
  - "adapterRemoval"
  - "porechop"
  - "fastqc":
      name: "FastQC (post-Trimming)"
      path_filters:
        - "*_processed_*fastqc.zip"
  - "fastqc":
      name: "Falco (post-Trimming)"
      path_filters:
        - "*_processed_falco_*_report.html"
  - "bbduk"
  - "prinseqplusplus"
  - "filtlong"
  - "bowtie2":
      name: "bowtie2"
  - "samtools":
      name: "Samtools Stats"
  - "kraken":
      name: "Kraken"
      path_filters:
        - "*.kraken2.kraken2.report.txt"
  - "kraken":
      name: "Bracken"
      anchor: "bracken"
      target: "Bracken"
      doi: "10.7717/peerj-cs.104"
      info: "Estimates species abundances in metagenomics samples by probabilistically re-distributing reads in the taxonomic tree."
      extra: "Note: plot title will say Kraken2 due to the first step of bracken producing the same output format as Kraken. Abundance information is currently not supported in MultiQC."
      path_filters:
        - "*.bracken.kraken2.report.txt"
  - "kraken":
      name: "Centrifuge"
      anchor: "centrifuge"
      target: "Centrifuge"
      doi: "10.1101/gr.210641.116"
      info: "is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. The system uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index. Note: Figure title"
      extra: "Note: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
      path_filters:
        - "*.centrifuge.txt"
  - "malt":
      name: "MALT"
  - "diamond"
  - "kaiju":
      name: "Kaiju"
  - "motus"
 #It is not possible to set placement for custom kraken and centrifuge columns.
 table_columns_placement:
  FastQC (pre-Trimming):
    total_sequences: 100
    avg_sequence_length: 110
    percent_duplicates: 120
    percent_gc: 130
    percent_fails: 140
  Falco (pre-Trimming):
    total_sequences: 200
    avg_sequence_length: 210
    percent_duplicates: 220
    percent_gc: 230
    percent_fails: 240
  fastp:
    pct_adapter: 300
    pct_surviving: 310
    pct_duplication: 320
    after_filtering_gc_content: 330
    after_filtering_q30_rate: 340
    after_filtering_q30_bases: 350
  Adapter Removal:
    aligned_total: 360
    percent_aligned: 370
    percent_collapsed: 380
    percent_discarded: 390
  FastQC (post-Trimming):
    total_sequences: 400
    avg_sequence_length: 410
    percent_duplicates: 420
    percent_gc: 430
    percent_fails: 440
  Falco (post-Trimming):
    total_sequences: 500
    avg_sequence_length: 510
    percent_duplicates: 520
    percent_gc: 530
    percent_fails: 540
  bowtie2:
    overall_alignment_rate: 600
  Samtools Stats:
    raw_total_sequences: 700
    reads_mapped: 710
    reads_mapped_percent: 720
    reads_properly_paired_percent: 730
    non-primary_alignments: 740
    reads_MQ0_percent: 750
    error_rate: 760
  MALT:
    Num. of queries: 1000
    Total reads: 1100
    Mappability: 1200
    Assig. Taxonomy: 1300
    Taxonomic assignment success: 1400
  Kaiju:
    assigned: 2000
    "% Assigned": 2100
    "% Unclassified": 2200
 table_columns_visible:
  FastQC (pre-Trimming):
    total_sequences: True
    avg_sequence_length: True
    percent_duplicates: True
    percent_gc: True
    percent_fails: False
  Falco (pre-Trimming):
    total_sequences: True
    avg_sequence_length: True
    percent_duplicates: True
    percent_gc: True
    percent_fails: False
  fastp:
    pct_adapter: True
    pct_surviving: True
    pct_duplication: False
    after_filtering_gc_content: False
    after_filtering_q30_rate: False
    after_filtering_q30_bases: False
  Adapter Removal:
    aligned_total: True
    percent_aligned: True
    percent_collapsed: True
    percent_discarded: False
  FastQC (post-Trimming):
    total_sequences: True
    avg_sequence_length: True
    percent_duplicates: False
    percent_gc: False
    percent_fails: False
  Falco (post-Trimming):
    total_sequences: True
    avg_sequence_length: True
    percent_duplicates: False
    percent_gc: False
    percent_fails: False
  bowtie2:
    overall_alignment_rate: True
  Samtools Stats:
    raw_total_sequences: True
    reads_mapped: True
    reads_mapped_percent: True
    reads_properly_paired_percent: False
    non-primary_alignments: False
    reads_MQ0_percent: False
    error_rate: False
  Kraken:
    "% Unclassified": True
    "% Top 5": False
  Bracken:
    "% Unclassified": True
    "% Top 5": False
  Centrifuge:
    "% Unclassified": True
    "% Top 5": False
  MALT:
    Num. of queries: True
    Total reads: True
    Mappability: True
    Assig. Taxonomy: False
    Taxonomic assignment success: True
  Kaiju:
    assigned: False
    "% Assigned": False
    "% Unclassified": True
 table_columns_name:
  FastQC (pre-Trimming):
    total_sequences: "Nr. Input Reads"
    avg_sequence_length: "Length Input Reads"
    percent_gc: "% GC Input Reads"
    percent_duplicates: "% Dups Input Reads"
    percent_fails: "% Failed Input Reads"
  Falco (pre-Trimming):
    total_sequences: "Nr. Input Reads"
    avg_sequence_length: "Length Input Reads"
    percent_gc: "% GC Input Reads"
    percent_duplicates: "% Dups Input Reads"
    percent_fails: "% Failed Input Reads"
  FastQC (post-Trimming):
    total_sequences: "Nr. Processed Reads"
    avg_sequence_length: "Length Processed Reads"
    percent_gc: "% GC Processed Reads"
    percent_duplicates: "% Dups Processed Reads"
    percent_fails: "% Failed Processed Reads"
  Falco (post-Trimming):
    total_sequences: "Nr. Processed Reads"
    avg_sequence_length: "Length Processed Reads"
    percent_gc: "% GC Processed Reads"
    percent_duplicates: "% Dups Processed Reads"
    percent_fails: "% Failed Processed Reads"
  Samtools Stats:
    raw_total_sequences: "Nr. Reads Into Mapping"
    reads_mapped: "Nr. Mapped Reads"
    reads_mapped_percent: "% Mapped Reads"
 extra_fn_clean_exts:
  - ".kraken2.kraken2.report.txt"
  - ".centrifuge.txt"
  - ".bracken.kraken2.report.txt"
  - ".settings"
--- a/assets/samplesheet.csv
+++ b/assets/samplesheet.csv
@ -1,3 +1,6 @@
-sample,fastq_1,fastq_2
+sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
+2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
+2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
 2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
 2613,ERR5766181,ILLUMINA,/<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz,/<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz,
 ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@ -1,261 +1,232 @@
 #!/usr/bin/env python
-
+from distutils import extension
-"""Provide a command line tool to validate and transform tabular samplesheets."""
+import os
 import argparse
 import csv
 import logging
 import sys
-from collections import Counter
+import errno
-from pathlib import Path
+import argparse
 logger = logging.getLogger()
-class RowChecker:
+def parse_args(args=None):
-    """
+    Description = "Reformat nf-core/taxprofiler samplesheet file and check its contents."
    Define a service that can validate and transform each given row.
-    Attributes:
+    Epilog = "Example usage: python check_samplesheet.py <FILE_IN> <FILE_OUT>"
        modified (list): A list of dicts, where each dict corresponds to a previously
            validated and transformed row. The order of rows is maintained.
-    """
+    parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
-
+    parser.add_argument("FILE_IN", help="Input samplesheet file.")
-    VALID_FORMATS = (
+    parser.add_argument("FILE_OUT", help="Output file.")
-        ".fq.gz",
+    return parser.parse_args(args)
        ".fastq.gz",
    )
    def __init__(
        self,
        sample_col="sample",
        first_col="fastq_1",
        second_col="fastq_2",
        single_col="single_end",
        **kwargs,
    ):
        """
        Initialize the row checker with the expected column names.
        Args:
            sample_col (str): The name of the column that contains the sample name
                (default "sample").
            first_col (str): The name of the column that contains the first (or only)
                FASTQ file path (default "fastq_1").
            second_col (str): The name of the column that contains the second (if any)
                FASTQ file path (default "fastq_2").
            single_col (str): The name of the new column that will be inserted and
                records whether the sample contains single- or paired-end sequencing
                reads (default "single_end").
        """
        super().__init__(**kwargs)
        self._sample_col = sample_col
        self._first_col = first_col
        self._second_col = second_col
        self._single_col = single_col
        self._seen = set()
        self.modified = []
    def validate_and_transform(self, row):
        """
        Perform all validations on the given row and insert the read pairing status.
        Args:
            row (dict): A mapping from column headers (keys) to elements of that row
                (values).
        """
        self._validate_sample(row)
        self._validate_first(row)
        self._validate_second(row)
        self._validate_pair(row)
        self._seen.add((row[self._sample_col], row[self._first_col]))
        self.modified.append(row)
    def _validate_sample(self, row):
        """Assert that the sample name exists and convert spaces to underscores."""
        if len(row[self._sample_col]) <= 0:
            raise AssertionError("Sample input is required.")
        # Sanitize samples slightly.
        row[self._sample_col] = row[self._sample_col].replace(" ", "_")
    def _validate_first(self, row):
        """Assert that the first FASTQ entry is non-empty and has the right format."""
        if len(row[self._first_col]) <= 0:
            raise AssertionError("At least the first FASTQ file is required.")
        self._validate_fastq_format(row[self._first_col])
    def _validate_second(self, row):
        """Assert that the second FASTQ entry has the right format if it exists."""
        if len(row[self._second_col]) > 0:
            self._validate_fastq_format(row[self._second_col])
    def _validate_pair(self, row):
        """Assert that read pairs have the same file extension. Report pair status."""
        if row[self._first_col] and row[self._second_col]:
            row[self._single_col] = False
            first_col_suffix = Path(row[self._first_col]).suffixes[-2:]
            second_col_suffix = Path(row[self._second_col]).suffixes[-2:]
            if first_col_suffix != second_col_suffix:
                raise AssertionError("FASTQ pairs must have the same file extensions.")
        else:
            row[self._single_col] = True
    def _validate_fastq_format(self, filename):
        """Assert that a given filename has one of the expected FASTQ extensions."""
        if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
            raise AssertionError(
                f"The FASTQ file has an unrecognized extension: {filename}\n"
                f"It should be one of: {', '.join(self.VALID_FORMATS)}"
            )
    def validate_unique_samples(self):
        """
        Assert that the combination of sample name and FASTQ filename is unique.
        In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
        number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
        """
        if len(self._seen) != len(self.modified):
            raise AssertionError("The pair of sample name and FASTQ must be unique.")
        seen = Counter()
        for row in self.modified:
            sample = row[self._sample_col]
            seen[sample] += 1
            row[self._sample_col] = f"{sample}_T{seen[sample]}"
-def read_head(handle, num_lines=10):
+def make_dir(path):
-    """Read the specified number of lines from the current position in the file."""
+    if len(path) > 0:
-    lines = []
+        try:
-    for idx, line in enumerate(handle):
+            os.makedirs(path)
-        if idx == num_lines:
+        except OSError as exception:
-            break
+            if exception.errno != errno.EEXIST:
-        lines.append(line)
+                raise exception
    return "".join(lines)
-def sniff_format(handle):
+def print_error(error, context="Line", context_str=""):
-    """
+    error_str = "ERROR: Please check samplesheet -> {}".format(error)
-    Detect the tabular format.
+    if context != "" and context_str != "":
-
+        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
-    Args:
+            error, context.strip(), context_str.strip()
-        handle (text file): A handle to a `text file`_ object. The read position is
+        )
-        expected to be at the beginning (index 0).
+    print(error_str)
-
+    sys.exit(1)
    Returns:
        csv.Dialect: The detected tabular format.
    .. _text file:
        https://docs.python.org/3/glossary.html#term-text-file
    """
    peek = read_head(handle)
    handle.seek(0)
    sniffer = csv.Sniffer()
    if not sniffer.has_header(peek):
        logger.critical("The given sample sheet does not appear to contain a header.")
        sys.exit(1)
    dialect = sniffer.sniff(peek)
    return dialect
 def check_samplesheet(file_in, file_out):
    """
-    Check that the tabular samplesheet has the structure expected by nf-core pipelines.
+    This function checks that the samplesheet follows the following structure:
    Validate the general shape of the table, expected columns, and each row. Also add
    an additional column which records whether one or two FASTQ reads were found.
    Args:
        file_in (pathlib.Path): The given tabular samplesheet. The format can be either
            CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``.
        file_out (pathlib.Path): Where the validated and transformed samplesheet should
            be created; always in CSV format.
    Example:
        This function checks that the samplesheet follows the following structure,
        see also the `viral recon samplesheet`_::
            sample,fastq_1,fastq_2
            SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz
            SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz
            SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,
    .. _viral recon samplesheet:
        https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
    sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
    2611,ERR5766174,ILLUMINA,,,ERX5474930_ERR5766174_1.fa.gz
    2612,ERR5766176,ILLUMINA,ERX5474932_ERR5766176_1.fastq.gz,ERX5474932_ERR5766176_2.fastq.gz,
    2612,ERR5766174,ILLUMINA,ERX5474936_ERR5766180_1.fastq.gz,,
    2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
    """
-    required_columns = {"sample", "fastq_1", "fastq_2"}
+
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
+    FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
-    with file_in.open(newline="") as in_handle:
+    FA_EXTENSIONS = (
-        reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
+        ".fa.gz",
-        # Validate the existence of the expected header columns.
+        ".fasta.gz",
-        if not required_columns.issubset(reader.fieldnames):
+        ".fna.gz",
-            req_cols = ", ".join(required_columns)
+        ".fas.gz",
-            logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
+    )
    INSTRUMENT_PLATFORMS = [
        "ABI_SOLID",
        "BGISEQ",
        "CAPILLARY",
        "COMPLETE_GENOMICS",
        "DNBSEQ",
        "HELICOS",
        "ILLUMINA",
        "ION_TORRENT",
        "LS454",
        "OXFORD_NANOPORE",
        "PACBIO_SMRT",
    ]
    sample_mapping_dict = {}
    with open(file_in, "r") as fin:
        ## Check header
        MIN_COLS = 4
        HEADER = [
            "sample",
            "run_accession",
            "instrument_platform",
            "fastq_1",
            "fastq_2",
            "fasta",
        ]
        header = [x.strip('"') for x in fin.readline().strip().split(",")]
        ## Check for missing mandatory columns
        missing_columns = list(set(HEADER) - set(header))
        if len(missing_columns) > 0:
            print(
                "ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
                    ",".join(missing_columns)
                )
            )
            sys.exit(1)
-        # Validate each row.
+
-        checker = RowChecker()
+        ## Find locations of mandatory columns
-        for i, row in enumerate(reader):
+        header_locs = {}
-            try:
+        for i in HEADER:
-                checker.validate_and_transform(row)
+            header_locs[i] = header.index(i)
-            except AssertionError as error:
+
-                logger.critical(f"{str(error)} On line {i + 2}.")
+        ## Check sample entries
-                sys.exit(1)
+        for line in fin:
-        checker.validate_unique_samples()
+
-    header = list(reader.fieldnames)
+            ## Pull out only relevant columns for downstream checking
-    header.insert(1, "single_end")
+            line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
-    # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
+
-    with file_out.open(mode="w", newline="") as out_handle:
+            # Check valid number of columns per row
-        writer = csv.DictWriter(out_handle, header, delimiter=",")
+            if len(line_parsed) < len(HEADER):
-        writer.writeheader()
+                print_error(
-        for row in checker.modified:
+                    "Invalid number of columns (minimum = {})!".format(len(HEADER)),
-            writer.writerow(row)
+                    "Line",
                    line,
                )
            num_cols = len([x for x in line_parsed if x])
            if num_cols < MIN_COLS:
                print_error(
                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
                    "Line",
                    line,
                )
            lspl = [line_parsed[i] for i in header_locs.values()]
            ## Check sample name entries
            (
                sample,
                run_accession,
                instrument_platform,
                fastq_1,
                fastq_2,
                fasta,
            ) = lspl[: len(HEADER)]
            sample = sample.replace(" ", "_")
            if not sample:
                print_error("Sample entry has not been specified!", "Line", line)
            ## Check FastQ file extension
            for fastq in [fastq_1, fastq_2]:
                if fastq:
                    if fastq.find(" ") != -1:
                        print_error("FastQ file contains spaces!", "Line", line)
                    if not fastq.endswith(FQ_EXTENSIONS):
                        print_error(
                            f"FastQ file does not have extension {' or '.join(list(FQ_EXTENSIONS))} !",
                            "Line",
                            line,
                        )
            if fasta:
                if fasta.find(" ") != -1:
                    print_error("FastA file contains spaces!", "Line", line)
                if not fasta.endswith(FA_EXTENSIONS):
                    print_error(
                        f"FastA file does not have extension {' or '.join(list(FA_EXTENSIONS))}!",
                        "Line",
                        line,
                    )
            sample_info = []
            # Check run_accession
            if not run_accession:
                print_error("Run accession has not been specified!", "Line", line)
            else:
                sample_info.append(run_accession)
            # Check instrument_platform
            if not instrument_platform:
                print_error("Instrument platform has not been specified!", "Line", line)
            else:
                if instrument_platform not in INSTRUMENT_PLATFORMS:
                    print_error(
                        f"Instrument platform {instrument_platform} is not supported! "
                        f"List of supported platforms {', '.join(INSTRUMENT_PLATFORMS)}",
                        "Line",
                        line,
                    )
                sample_info.append(instrument_platform)
            ## Auto-detect paired-end/single-end
            if sample and fastq_1 and fastq_2:  ## Paired-end short reads
                sample_info.extend(["0", fastq_1, fastq_2, fasta])
            elif sample and fastq_1 and not fastq_2:  ## Single-end short/long fastq reads
                sample_info.extend(["1", fastq_1, fastq_2, fasta])
            elif sample and fasta and not fastq_1 and not fastq_2:  ## Single-end long reads
                sample_info.extend(["1", fastq_1, fastq_2, fasta])
            elif fasta and (fastq_1 or fastq_2):
                print_error(
                    "FastQ and FastA files cannot be specified together in the same library!",
                    "Line",
                    line,
                )
            else:
                print_error("Invalid combination of columns provided!", "Line", line)
            ## Create sample mapping dictionary = { sample: [ run_accession, instrument_platform, single_end, fastq_1, fastq_2 , fasta ] }
            if sample not in sample_mapping_dict:
                sample_mapping_dict[sample] = [sample_info]
            else:
                if sample_info in sample_mapping_dict[sample]:
                    print_error("Samplesheet contains duplicate rows!", "Line", line)
                else:
                    sample_mapping_dict[sample].append(sample_info)
    ## Write validated samplesheet with appropriate columns
    HEADER_OUT = [
        "sample",
        "run_accession",
        "instrument_platform",
        "single_end",
        "fastq_1",
        "fastq_2",
        "fasta",
    ]
    if len(sample_mapping_dict) > 0:
        out_dir = os.path.dirname(file_out)
        make_dir(out_dir)
        with open(file_out, "w") as fout:
            fout.write(",".join(HEADER_OUT) + "\n")
            for sample in sorted(sample_mapping_dict.keys()):
                for idx, val in enumerate(sample_mapping_dict[sample]):
                    fout.write(f"{sample},{','.join(val)}\n")
    else:
        print_error("No entries to process!", "Samplesheet: {}".format(file_in))
-def parse_args(argv=None):
+def main(args=None):
-    """Define and immediately parse command line arguments."""
+    args = parse_args(args)
-    parser = argparse.ArgumentParser(
+    check_samplesheet(args.FILE_IN, args.FILE_OUT)
        description="Validate and transform a tabular samplesheet.",
        epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
    )
    parser.add_argument(
        "file_in",
        metavar="FILE_IN",
        type=Path,
        help="Tabular input samplesheet in CSV or TSV format.",
    )
    parser.add_argument(
        "file_out",
        metavar="FILE_OUT",
        type=Path,
        help="Transformed output samplesheet in CSV format.",
    )
    parser.add_argument(
        "-l",
        "--log-level",
        help="The desired log level (default WARNING).",
        choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"),
        default="WARNING",
    )
    return parser.parse_args(argv)
 def main(argv=None):
    """Coordinate argument parsing and program execution."""
    args = parse_args(argv)
    logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
    if not args.file_in.is_file():
        logger.error(f"The given input file {args.file_in} was not found!")
        sys.exit(2)
    args.file_out.parent.mkdir(parents=True, exist_ok=True)
    check_samplesheet(args.file_in, args.file_out)
 if __name__ == "__main__":
--- a/conf/modules.config
+++ b/conf/modules.config
@ -12,22 +12,529 @@
 process {
-    publishDir = [
+    withName: FASTQC {
-        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+        ext.args = '--quiet'
-        mode: params.publish_dir_mode,
+        ext.prefix = { "${meta.id}_${meta.run_accession}_raw" }
        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
    ]
    withName: SAMPLESHEET_CHECK {
        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
+            path: { "${params.outdir}/fastqc/raw" },
            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            pattern: '*.html'
        ]
    }
-    withName: FASTQC {
+    withName: FASTQC_PROCESSED {
        ext.args = '--quiet'
        ext.prefix = { "${meta.id}_${meta.run_accession}_processed" }
        publishDir = [
            path: { "${params.outdir}/fastqc/processed" },
            mode: params.publish_dir_mode,
            pattern: '*.html'
        ]
    }
    withName: FALCO {
        ext.prefix = { "${meta.id}_${meta.run_accession}_raw_falco" }
        publishDir = [
            path: { "${params.outdir}/falco/raw" },
            mode: params.publish_dir_mode,
            pattern: '*.{html,txt}'
        ]
    }
    withName: FALCO_PROCESSED {
        ext.prefix = { "${meta.id}_${meta.run_accession}_processed_falco" }
        publishDir = [
            path: { "${params.outdir}/falco/processed" },
            mode: params.publish_dir_mode,
            pattern: '*.{html,txt}'
        ]
    }
    withName: FASTP_SINGLE {
        ext.args   = [
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
            // filtering options
            "--length_required ${params.shortread_qc_minlength}",
            (params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp') ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/fastp" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/fastp" },
                mode: params.publish_dir_mode,
                pattern: '*.{log,html,json}'
            ]
        ]
    }
    withName: FASTP_PAIRED {
        ext.args   = [
            // collapsing options - option to retain singletons
            params.shortread_qc_includeunmerged ? '--include_unmerged' : '',
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "",
            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "",
            params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe",
            // filtering options
            "--length_required ${params.shortread_qc_minlength}",
            params.perform_shortread_complexityfilter && params.shortread_complexityfilter_tool == 'fastp' ? "--low_complexity_filter --complexity_threshold ${params.shortread_complexityfilter_fastp_threshold}" : ''
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/fastp" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/fastp" },
                mode: params.publish_dir_mode,
                pattern: '*.{log,html,json}'
            ]
        ]
    }
    withName: ADAPTERREMOVAL_SINGLE {
        ext.args   = [
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "",
            // filtering options
            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/adapterremoval" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/adapterremoval" },
                mode: params.publish_dir_mode,
                pattern: '*.settings'
            ]
        ]
    }
    withName: ADAPTERREMOVAL_PAIRED {
        ext.args   = [
            // collapsing options
            params.shortread_qc_mergepairs ? "--collapse" : "",
            // trimming options
            params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", // adding adapter list happens at module input channel level
            params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "",
            // filtering options
            "--minlength ${params.shortread_qc_minlength}"
        ].join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/adapterremoval" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/adapterremoval" },
                mode: params.publish_dir_mode,
                pattern: '*.settings'
            ]
        ]
    }
    withName: PORECHOP_PORECHOP {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/porechop" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/porechop" },
                mode: params.publish_dir_mode,
                pattern: '*.log'
            ]
        ]
    }
    withName: FILTLONG {
        ext.args = [
            "--min_length ${params.longread_qc_qualityfilter_minlength}",
            "--keep_percent ${params.longread_qc_qualityfilter_keeppercent}",
            "--target_bases ${params.longread_qc_qualityfilter_targetbases}"
        ]
        .join(' ').trim()
        ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" }
        publishDir = [
            [
                path: { "${params.outdir}/filtlong" },
                mode: params.publish_dir_mode,
                pattern: '*.fastq.gz',
                enabled: params.save_preprocessed_reads
            ],
            [
                path: { "${params.outdir}/filtlong" },
                mode: params.publish_dir_mode,
                pattern: '*.log'
            ]
        ]
    }
    withName: BOWTIE2_BUILD {
        publishDir = [
            path: { "${params.outdir}/bowtie2/build" },
            mode: params.publish_dir_mode,
            enabled: params.save_hostremoval_index,
            pattern: 'bowtie2'
        ]
    }
    withName: BOWTIE2_ALIGN {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/bowtie2/align" },
                mode: params.publish_dir_mode,
                pattern: '*.log'
            ],
            [
                path: { "${params.outdir}/bowtie2/align" },
                mode: params.publish_dir_mode,
                enabled: params.save_hostremoval_mapped,
                pattern: '*.bam'
            ],
            [
                path: { "${params.outdir}/bowtie2/align" },
                mode: params.publish_dir_mode,
                enabled: params.save_hostremoval_unmapped,
                pattern: '*.fastq.gz'
            ]
        ]
    }
    withName: MINIMAP2_INDEX {
        ext.args = '-x map-ont'
        publishDir = [
            path: { "${params.outdir}/minimap2/index" },
            mode: params.publish_dir_mode,
            enabled: params.save_hostremoval_index,
            pattern: 'minimap2'
        ]
    }
    withName: MINIMAP2_ALIGN {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/minimap2/align" },
            mode: params.publish_dir_mode,
            enabled: params.save_hostremoval_mapped,
            pattern: '*.bam'
        ]
    }
    withName: SAMTOOLS_VIEW {
        ext.args = '-f 4'
        ext.prefix = { "${meta.id}.mapped.sorted" }
        publishDir = [
            path: { "${params.outdir}/samtools/view" },
            mode: params.publish_dir_mode,
            enabled: params.save_hostremoval_unmapped,
            pattern: '*.bam'
        ]
    }
    withName: SAMTOOLS_BAM2FQ {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/samtools/bam2fq" },
            mode: params.publish_dir_mode,
            enabled: params.save_hostremoval_unmapped,
            pattern: '*.fq.gz'
        ]
    }
    withName: SAMTOOLS_STATS {
        ext.prefix = { "${meta.id}_${meta.run_accession}" }
        publishDir = [
            path: { "${params.outdir}/samtools/stats" },
            mode: params.publish_dir_mode,
            pattern: '*stats'
        ]
    }
    withName: BBMAP_BBDUK {
        ext.args =  [
                "entropy=${params.shortread_complexityfilter_entropy}",
                "entropywindow=${params.shortread_complexityfilter_bbduk_windowsize}",
                params.shortread_complexityfilter_bbduk_mask ?  "entropymask=t" : "entropymask=f"
            ].join(' ').trim()
        ext.prefix = { "${meta.id}-${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/bbduk/" },
                mode: params.publish_dir_mode,
                pattern: '*.{fastq.gz,log}',
                enabled: params.save_complexityfiltered_reads
            ],
            [
                path: { "${params.outdir}/bbduk/" },
                mode: params.publish_dir_mode,
                pattern: '*.log'
            ]
        ]
    }
    withName: PRINSEQPLUSPLUS {
        ext.args =  [
                params.shortread_complexityfilter_prinseqplusplus_mode == 'dust' ? "-lc_dust=${params.shortread_complexityfilter_prinseqplusplus_dustscore}" : "-lc_entropy=${params.shortread_complexityfilter_entropy}",
                "-trim_qual_left=0 -trim_qual_left=0 -trim_qual_window=0 -trim_qual_step=0",
                "-VERBOSE 2"
            ].join(' ').trim()
        ext.prefix = { "${meta.id}-${meta.run_accession}" }
        publishDir = [
            [
                path: { "${params.outdir}/prinseqplusplus/" },
                mode: params.publish_dir_mode,
                pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}',
                enabled: params.save_complexityfiltered_reads
            ],
            [
                path: { "${params.outdir}/prinseqplusplus/" },
                mode: params.publish_dir_mode,
                pattern: '*.log'
            ]
        ]
    }
    withName: CAT_FASTQ {
        ext.prefix = { "${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/run_merging/" },
            mode: params.publish_dir_mode,
            pattern: '*.fastq.gz',
            enabled: params.save_runmerged_reads
        ]
    }
    withName: MALT_RUN {
        ext.args = { "${meta.db_params} -m ${params.malt_mode}" }
        // one run with multiple samples, so fix ID to just db name to ensure clean log name
        ext.prefix = { "${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/malt/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{rma6,log,sam}'
        ]
    }
    withName: 'MEGAN_RMA2INFO_TSV' {
        ext.args = "-c2c Taxonomy"
        ext.prefix = { "${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/malt/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt.gz,megan}'
        ]
    }
    withName: KRAKEN2_KRAKEN2 {
        ext.args = params.kraken2_save_minimizers ? { "${meta.db_params} --report-minimizer-data" } : { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { meta.tool == "bracken" ? "${meta.id}-${meta.db_name}.bracken" : "${meta.id}-${meta.db_name}" } : { meta.tool == "bracken" ? "${meta.id}-${meta.run_accession}-${meta.db_name}.bracken" : "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/kraken2/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt,report,fastq.gz}'
        ]
    }
    withName: BRACKEN_BRACKEN {
        errorStrategy = 'ignore'
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.bracken" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.bracken" }
        publishDir = [
            path: { "${params.outdir}/bracken/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.tsv'
        ]
    }
    withName: BRACKEN_COMBINEBRACKENOUTPUTS {
        ext.prefix = { "bracken_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/bracken/" },
            mode: params.publish_dir_mode,
            pattern: '*.txt'
        ]
    }
    withName: KRAKENTOOLS_COMBINEKREPORTS_KRAKEN {
        ext.prefix = { "kraken2_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/kraken2/" },
            mode: params.publish_dir_mode,
            pattern: '*.txt'
        ]
    }
    withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
        ext.args = { "${meta.db_params}" }
        // one run with multiple samples, so fix ID to just db name to ensure clean log name
        ext.prefix = { "${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/krakenuniq/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt,report,fastq.gz}'
        ]
    }
    withName: KRONA_CLEANUP {
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
            pattern: '*.{html}'
        ]
    }
    withName: KRONA_KTIMPORTTEXT {
        ext.prefix = { "${meta.tool}-${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
            pattern: '*.{html}'
        ]
    }
    withName: 'MEGAN_RMA2INFO_KRONA' {
        ext.args = { "--read2class Taxonomy" }
        ext.prefix = { "${meta.id}-${meta.db_name}" }
    }
    withName: KRONA_KTIMPORTTAXONOMY {
        ext.args = "-i"
        ext.prefix = { "${meta.tool}-${meta.id}" }
        publishDir = [
            path: { "${params.outdir}/krona/" },
            mode: params.publish_dir_mode,
            pattern: '*.{html}'
        ]
    }
    withName: METAPHLAN3_METAPHLAN3 {
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/metaphlan3/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{biom,txt}'
        ]
    }
    withName: METAPHLAN3_MERGEMETAPHLANTABLES {
        ext.prefix = { "metaphlan3_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/metaphlan3/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt}'
        ]
    }
    withName: CENTRIFUGE_CENTRIFUGE {
        publishDir = [
            path: { "${params.outdir}/centrifuge/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt,sam,gz}'
        ]
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
    }
    withName: CENTRIFUGE_KREPORT {
        errorStrategy = {task.exitStatus == 255 ? 'ignore' : 'retry'}
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" }
        publishDir = [
            path: { "${params.outdir}/centrifuge/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt}'
        ]
    }
    withName: KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE {
        ext.prefix = { "centrifuge_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/centrifuge/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt}'
        ]
    }
    withName: KAIJU_KAIJU {
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/kaiju/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.tsv'
        ]
        ext.args = { "${meta.db_params}" }
    }
    withName: KAIJU_KAIJU2TABLE {
        ext.prefix = { "kaiju_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/kaiju/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt}'
        ]
    }
    withName: KAIJU_KAIJU2KRONA {
        ext.args = '-v -u'
    }
    withName: DIAMOND_BLASTX {
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/diamond/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{blast,xml,txt,daa,sam,tsv,paf,log}'
        ]
    }
    withName: MOTUS_PROFILE {
        ext.args = {
            [
                params.motus_remove_ncbi_ids ? "" : "-p",
                params.motus_use_relative_abundance ? "" : "-c",
                params.motus_save_mgc_read_counts ?  "-M ${task.ext.prefix}.mgc" : ""
            ].join(',').replaceAll(','," ")
            }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
        publishDir = [
            path: { "${params.outdir}/motus/${meta.db_name}/" },
            mode: params.publish_dir_mode
        ]
    }
    withName: MOTUS_MERGE {
        ext.args = { params.generate_biom_output ? "-B" : "" }
        ext.prefix = { "motus_${meta.id}_combined_reports" }
        publishDir = [
            path: { "${params.outdir}/motus/" },
            mode: params.publish_dir_mode
        ]
    }
    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
@ -38,4 +545,11 @@ process {
        ]
    }
    withName: MULTIQC {
        publishDir = [
            path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
            mode: params.publish_dir_mode,
            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
        ]
    }
 }
--- a/conf/test.config
+++ b/conf/test.config
@ -22,8 +22,42 @@ params {
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
-
+    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
-    // Genome references
+    perform_shortread_qc                  = true
-    genome = 'R64-1-1'
+    perform_longread_qc                   = true
    shortread_qc_mergepairs               = true
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    perform_longread_hostremoval          = true
    perform_runmerging                    = true
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = true
    run_kraken2                           = true
    run_bracken                           = true
    run_malt                              = false
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
    run_krakenuniq                        = true
    run_motus                             = false
    run_krona                             = true
    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
    malt_save_reads                       = true
    kraken2_save_reads                    = true
    centrifuge_save_reads                 = true
    diamond_save_reads                    = true
 }
 process {
    withName: MALT_RUN {
        maxForks = 1
        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
    withName: MEGAN_RMA2INFO_TSV {
        maxForks = 1
    }
    withName: MEGAN_RMA2INFO_KRONA {
        maxForks = 1
    }
 }
--- a/conf/test_krakenuniq.config
+++ b/conf/test_krakenuniq.config
@ -0,0 +1,66 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 //
 // Separate test as KrakenUniq database can sometimes be too big for GHA
 //
 params {
    config_profile_name        = 'Test profile'
    config_profile_description = 'Minimal test to check KrakenUniq function'
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database_krakenuniq.csv'
    perform_shortread_qc                  = true
    perform_longread_qc                   = true
    shortread_qc_mergepairs               = true
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    perform_longread_hostremoval          = true
    perform_runmerging                    = true
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_krakenuniq                        = true
    run_motus                             = false
    run_krona                             = true
    krona_taxonomy_directory              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/metagenome/krona_taxonomy.tab'
    malt_save_reads                       = true
    kraken2_save_reads                    = true
    centrifuge_save_reads                 = true
    diamond_save_reads                    = true
 }
 process {
    withName: MALT_RUN {
        maxForks = 1
    }
    withName: MEGAN_RMA2INFO_TSV {
        maxForks = 1
    }
    withName: MEGAN_RMA2INFO_KRONA {
        maxForks = 1
    }
 }
--- a/conf/test_motus.config
+++ b/conf/test_motus.config
@ -0,0 +1,51 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 //
 // Separate test as mOTUs database download can be flaky
 //
 params {
    config_profile_name        = 'mOTUs Test profile'
    config_profile_description = 'Minimal test to check mOTUs function'
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'database_motus.csv'
    perform_shortread_qc                  = false
    perform_longread_qc                   = false
    perform_shortread_complexityfilter    = false
    perform_shortread_hostremoval         = false
    perform_longread_hostremoval          = false
    perform_runmerging                    = false
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_krakenuniq                        = false
    run_motus                             = true
    motus_save_mgc_read_counts            = false
    motus_remove_ncbi_ids                 = false
    motus_use_relative_abundance          = false
    run_profile_standardisation           = true
 }
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@ -0,0 +1,51 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 params {
    config_profile_name        = 'Test profile'
    config_profile_description = 'Minimal test dataset skipping all preprocessing to check pipeline function'
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = false
    perform_longread_qc                   = false
    perform_shortread_complexityfilter    = false
    perform_shortread_hostremoval         = false
    perform_longread_hostremoval          = false
    perform_runmerging                    = false
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = true
    run_kraken2                           = true
    run_bracken                           = true
    run_malt                              = true
    run_metaphlan3                        = true
    run_centrifuge                        = true
    run_diamond                           = true
    run_krakenuniq                        = true
    run_motus                             = false
    run_krona                             = true
 }
 process {
    withName: MALT_RUN {
        maxForks = 1
        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
 }
--- a/conf/test_noprofiling.config
+++ b/conf/test_noprofiling.config
@ -0,0 +1,50 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 params {
    config_profile_name        = 'Test profile'
    config_profile_description = 'Minimal test dataset without performing any profiling to check pipeline function'
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = true
    perform_longread_qc                   = true
    shortread_qc_mergepairs               = true
    perform_shortread_complexityfilter    = true
    perform_shortread_hostremoval         = true
    perform_longread_hostremoval          = true
    perform_runmerging                    = true
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_krakenuniq                        = false
    run_motus                             = false
 }
 process {
    withName: MALT_RUN {
        maxForks = 1
    }
 }
--- a/conf/test_nothing.config
+++ b/conf/test_nothing.config
@ -0,0 +1,50 @@
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Nextflow config file for running minimal tests
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Defines input files and everything required to run a fast and simple pipeline test.
    Use as follows:
        nextflow run nf-core/taxprofiler -profile test,<docker/singularity> --outdir <OUTDIR>
 ----------------------------------------------------------------------------------------
 */
 params {
    config_profile_name        = 'Test profile'
    config_profile_description = "Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to 'opt-out' of all the others"
    // Limit resources so that this can run on GitHub Actions
    max_cpus   = 2
    max_memory = '6.GB'
    max_time   = '6.h'
    // Input data
    // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
    // TODO nf-core: Give any required params for the test so that command line flags are not needed
    input                                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/samplesheet.csv'
    databases                             = 'https://raw.githubusercontent.com/nf-core/test-datasets/taxprofiler/database.csv'
    perform_shortread_qc                  = false
    perform_longread_qc                   = false
    perform_shortread_complexityfilter    = false
    perform_shortread_hostremoval         = false
    perform_longread_hostremoval          = false
    perform_runmerging                    = false
    hostremoval_reference                 = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta'
    run_kaiju                             = false
    run_kraken2                           = false
    run_bracken                           = false
    run_malt                              = false
    run_metaphlan3                        = false
    run_centrifuge                        = false
    run_diamond                           = false
    run_krakenuniq                        = false
    run_motus                             = false
 }
 process {
    withName: MALT_RUN {
        maxForks = 1
        ext.args = { "-m ${params.malt_mode} -J-Xmx12G" }
    }
 }
--- a/docs/images/nf-core-taxprofiler_icon.png
+++ b/docs/images/nf-core-taxprofiler_icon.png
--- a/docs/images/nf-core-taxprofiler_icon.svg
+++ b/docs/images/nf-core-taxprofiler_icon.svg
@ -0,0 +1,444 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   width="160mm"
   height="120mm"
   viewBox="0 0 160 120"
   version="1.1"
   id="svg5581"
   inkscape:version="1.2 (1:1.2+202205241504+da316b6974)"
   sodipodi:docname="nf-core-taxprofiler_icon.svg"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:svg="http://www.w3.org/2000/svg">
  <sodipodi:namedview
     id="namedview5583"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     inkscape:showpageshadow="2"
     inkscape:pageopacity="0.0"
     inkscape:pagecheckerboard="0"
     inkscape:deskcolor="#d1d1d1"
     inkscape:document-units="mm"
     showgrid="true"
     inkscape:zoom="0.41291035"
     inkscape:cx="541.27972"
     inkscape:cy="445.61731"
     inkscape:window-width="1920"
     inkscape:window-height="1043"
     inkscape:window-x="0"
     inkscape:window-y="0"
     inkscape:window-maximized="1"
     inkscape:current-layer="layer1">
    <inkscape:grid
       type="xygrid"
       id="grid6096"
       originx="-7.7520637"
       originy="-7.8991986" />
  </sodipodi:namedview>
  <defs
     id="defs5578">
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient30057"
       x1="10.213824"
       y1="221.42242"
       x2="218.95003"
       y2="221.42242"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.6691607,0.01747377,-0.01751914,0.67089835,5.6293239,-100.28017)" />
    <linearGradient
       x1="0"
       y1="0"
       x2="1"
       y2="0"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(37.87862,-29.594021,-29.594021,-37.87862,275.46292,136.24821)"
       spreadMethod="pad"
       id="linearGradient21662">
      <stop
         style="stop-opacity:1;stop-color:#000000"
         offset="0"
         id="stop21652" />
      <stop
         style="stop-opacity:1;stop-color:#0c542a"
         offset="0.214724"
         id="stop21654" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="0.84662598"
         id="stop21656" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="0.846626"
         id="stop21658" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="1"
         id="stop21660" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient2708"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174"
       gradientUnits="userSpaceOnUse" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6027"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6029"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6031"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6033"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6035"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6037"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6039"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6041"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6043"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6045"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6047"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6049"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6051"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6053"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6055"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6057"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6059"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6061"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6063"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6065"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6067"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6069"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
  </defs>
  <g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1"
     transform="translate(-7.7520638,-7.8991986)">
    <g
       id="g6123"
       transform="translate(7.3175494,9.6409068)">
      <path
         id="path30012-5"
         style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
         d="M 9.5900834,8.8480637 C 7.5556439,12.028833 7.7408039,16.123943 7.7687039,19.769833 c 0.83412,6.65305 4.1623501,13.08043 9.6588001,17.05639 2.95411,2.45491 6.166,4.56934 9.30414,6.77667 -1.90794,4.17676 -2.98236,9.29825 -0.29786,13.40918 1.67713,3.72229 4.84583,6.89989 5.31985,11.06218 -0.73221,2.86643 -3.54676,6.3663 -0.97712,9.12477 2.12262,2.77099 4.06395,5.64907 5.35322,8.82231 2.02674,3.35439 6.5976,4.09759 10.20366,3.73748 2.47738,-0.18007 4.72202,-2.67715 3.28025,-5.08004 -1.18247,-2.55998 -4.28744,-2.18411 -6.38635,-3.25125 -1.36801,-2.44734 -3.00653,-5.99123 -0.26124,-8.17889 2.5926,-2.70872 4.69882,-5.81991 6.94515,-8.77398 2.71899,-1.24434 5.98816,-0.0654 8.84595,0.2859 10.88656,2.48435 21.31368,6.88721 32.36055,8.64773 3.13924,0.41138 6.76175,-0.45373 9.579516,0.92166 1.40942,4.43747 2.34224,9.45414 5.76225,12.84528 2.6934,1.93573 7.02228,2.12481 9.53599,-0.13787 1.60636,-2.93172 -1.52253,-5.89536 -4.20194,-6.62585 -0.40071,-2.8223 -0.37969,-5.87179 0.26854,-8.58529 3.77785,-0.24182 7.24094,1.16954 10.61732,2.45295 1.78619,3.3965 5.12259,5.96321 8.91186,6.64761 1.60415,-1.40492 2.56073,1.72858 3.8909,2.50862 2.79709,2.93023 6.8997,5.0038 10.99366,4.75587 3.26301,-0.67749 3.79546,-4.59452 2.47471,-7.17014 -0.80988,-4.2018 -3.22071,-8.59389 -1.2047,-12.82352 1.17357,-5.20598 -1.35199,-10.96325 -5.77146,-13.90702 -4.33733,-3.64436 -10.39077,-3.64653 -14.92742,-6.93614 -10.03223,-6.15469 -21.64077,-9.58338 -33.280326,-11.28456 -12.56653,-0.98847 -25.20371,-0.69003 -37.78225,-1.58411 -7.09295,-0.25502 -14.55087,-1.15633 -21.23347,1.84537 -3.9067,-0.36049 -7.22928,-2.98747 -10.60062,-4.8325 -3.63865,-2.26998 -7.6488,-4.94361 -8.46777,-9.49866 -1.63814,-4.07488 -1.20485,-8.6292 -2.57509,-12.7242393 -0.092,-1.43702 -3.1468606,-2.01015 -3.5173206,-0.42766 z" />
      <path
         style="fill:url(#linearGradient30057);fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
         d="M 10.931314,9.9653233 C 9.7827934,11.308473 9.8484234,13.338583 9.5801634,15.027613 c -0.34115,2.76171 -0.29206,5.58762 0.6731896,8.22888 1.139301,3.87108 2.982591,7.65359 6.139101,10.27078 3.79198,3.54184 8.2401,6.33641 12.43029,9.30118 -0.91535,2.99306 -2.65543,5.87431 -2.30804,9.13471 0.18061,2.677 1.94571,4.91009 3.11856,7.23201 1.50937,2.8256 3.84729,5.54985 3.82514,8.91032 -0.0787,2.49314 -2.48083,4.5082 -1.96562,7.03872 1.67419,2.68138 3.992,4.96942 5.15932,7.96769 0.65556,1.60629 1.4695,3.40539 3.31812,3.87174 2.38126,0.82152 5.41091,1.64045 7.63103,0.0125 0.82635,-1.48261 -1.06902,-2.72955 -2.36736,-2.87342 -1.71199,-0.17589 -3.69466,-0.59568 -4.39228,-2.41244 -0.96281,-2.26721 -2.29358,-4.88743 -1.3576,-7.36922 2.00069,-3.29907 5.15025,-5.72284 7.05316,-9.10416 0.97281,-1.66962 2.43831,-3.35242 4.55042,-3.26353 6.66383,-0.13609 13.02922,2.25844 19.35504,4.0482 6.38637,1.7963 12.67764,4.02096 19.20445,5.25889 3.77887,0.58276 7.72718,-0.18602 11.419706,0.93529 1.63461,0.75428 1.35512,2.9334 2.05535,4.34159 1.1597,3.06423 1.81242,6.5515 4.16823,8.9707 1.76834,1.42588 4.39281,1.36118 6.4399,0.63293 1.20689,-0.70879 -0.0705,-2.295 -0.82735,-2.84018 -0.85353,-0.97431 -2.49109,-0.81793 -3.19991,-1.88904 -0.61703,-3.21399 -0.48223,-6.57875 0.0181,-9.80404 0.2319,-1.63989 2.04068,-2.10159 3.44204,-1.83986 3.30625,0.10127 6.50718,1.46919 9.55674,2.3424 -0.71254,-1.81326 -1.19119,-3.75193 -0.97348,-5.72815 0.0204,-1.69846 0.15688,-3.58482 1.5917,-4.73058 1.83011,-1.86627 3.60232,-3.85815 5.70315,-5.41088 0.57552,0.16274 -0.36083,0.85452 -0.4987,1.09386 -1.90807,2.09201 -4.24787,3.84191 -5.83093,6.20217 -0.46736,1.92903 -0.41493,3.99677 -0.24575,5.96645 0.84833,2.57726 2.1706,5.12487 4.1346,7.01585 1.36224,0.87656 2.75795,2.00764 4.37729,2.2713 1.6213,-0.74843 1.95011,-2.85006 2.55124,-4.35962 1.10913,-3.99969 1.90782,-8.08534 2.71945,-12.15319 0.1702,-1.75995 0.75519,-4.04477 -0.9398,-5.26766 -0.66443,-0.31636 -0.15084,-1.05324 0.36147,-0.52827 2.02638,1.51086 1.30614,4.35901 1.06011,6.49585 -0.85333,4.26646 -1.66477,8.56551 -2.96213,12.72302 -0.11797,0.91577 -1.34029,1.9982 -0.53389,2.72401 2.58751,2.57362 4.81242,5.92113 8.5085,6.96547 1.59827,0.50471 3.80045,1.66945 5.18808,0.14586 1.17766,-1.4017 -0.0976,-3.1877 -0.28432,-4.72092 -0.75396,-3.29433 -2.15674,-6.56045 -1.96438,-9.99106 0.37367,-2.10533 1.65495,-4.11536 1.02167,-6.32262 -0.34992,-2.84568 -1.45335,-5.69952 -3.77909,-7.50012 -2.77154,-2.67603 -6.40479,-4.14933 -10.09936,-5.01092 -3.81561,-1.24593 -6.9919,-3.83431 -10.61749,-5.50364 -6.72466,-3.69962 -14.2084,-5.65169 -21.615834,-7.44474 -8.303242,-1.93229 -16.888842,-1.47941 -25.343602,-1.84797 -8.46313,-0.1352 -16.90366,-0.80082 -25.3617,-1.05107 -4.38737,-0.20312 -8.92121,-0.0523 -12.99753,1.77349 -2.83378,1.235 -5.55113,-0.83445 -8.06653,-1.90121 -3.95435,-2.11512 -8.20653,-4.63235 -11.45036,-7.81607 -1.44093,-1.52832 -1.98381,-3.13498 -2.61394,-4.93556 -0.98461,-2.81345 -1.62386,-5.97312 -1.9462,-9.06999 -0.24149,-1.39646 -0.0764,-3.01443 -0.81326,-4.2426397 -0.0219,-0.007 -0.045,-0.0107 -0.0678,-0.007 z"
         id="path30012"
         sodipodi:nodetypes="cccccccccccccccccccccccccccccccccccccccccccccccccccccccccscccc"
         inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
         inkscape:export-xdpi="159"
         inkscape:export-ydpi="159" />
      <path
         id="path1218-5-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 36.798094,98.985353 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637147 7.0058785,6.6371479 0 0 1 -7.00589,-6.637147 7.0058785,6.6371479 0 0 1 7.00589,-6.63713 7.0058785,6.6371479 0 0 1 7.00588,6.63713 z" />
      <path
         id="path1218-8-0"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 48.458974,96.857863 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-4-46"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 59.481564,99.639333 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637157 7.0058789,6.6371479 0 0 1 -7.00588,-6.637157 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-7-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 71.168234,96.889183 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637157 7.0058785,6.6371479 0 0 1 -7.00588,-6.637157 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00589,6.63715 z" />
      <path
         id="path1218-1-6"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 82.714904,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00587,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00587,6.63714 z" />
      <path
         id="path1218-73-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 93.813184,96.584163 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00587,-6.637147 7.0058789,6.6371479 0 0 1 7.00587,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-5-9-3"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 104.85924,98.601273 a 7.0058789,6.6371479 0 0 1 -7.005886,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63716 7.0058789,6.6371479 0 0 1 7.005886,6.63716 z" />
      <path
         id="path1218-8-7-8"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 116.52015,96.473773 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637127 7.0058785,6.6371479 0 0 1 -7.00588,-6.637127 7.0058785,6.6371479 0 0 1 7.00588,-6.63714 7.0058785,6.6371479 0 0 1 7.00589,6.63714 z" />
      <path
         id="path1218-4-7-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 127.54274,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00589,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00589,6.63714 z" />
      <path
         id="path1218-7-9-00"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 139.22941,96.505103 a 7.0058785,6.6371479 0 0 1 -7.00587,6.637147 7.0058785,6.6371479 0 0 1 -7.00588,-6.637147 7.0058785,6.6371479 0 0 1 7.00588,-6.63716 7.0058785,6.6371479 0 0 1 7.00587,6.63716 z" />
      <path
         id="path1218-1-5-9"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 150.77607,98.871153 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637137 7.0058785,6.6371479 0 0 1 -7.00588,-6.637137 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00588,6.63715 z" />
      <g
         id="g2682"
         style="fill:url(#linearGradient2708);fill-opacity:1;stroke:url(#linearGradient2708)"
         inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
         inkscape:export-xdpi="159"
         inkscape:export-ydpi="159"
         transform="matrix(3.7156967,0,0,3.7156967,-30.828156,-200.55501)">
        <ellipse
           style="fill:url(#linearGradient6027);fill-opacity:1;stroke:url(#linearGradient6029);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-5"
           cx="16.378149"
           cy="80.642143"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6031);fill-opacity:1;stroke:url(#linearGradient6033);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-8"
           cx="19.516428"
           cy="80.069572"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6035);fill-opacity:1;stroke:url(#linearGradient6037);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-4"
           cx="22.48292"
           cy="80.818146"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6039);fill-opacity:1;stroke:url(#linearGradient6041);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-7"
           cx="25.628136"
           cy="80.078003"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6043);fill-opacity:1;stroke:url(#linearGradient6045);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-1"
           cx="28.735676"
           cy="80.714775"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6047);fill-opacity:1;stroke:url(#linearGradient6049);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-73"
           cx="31.72254"
           cy="79.995911"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6051);fill-opacity:1;stroke:url(#linearGradient6053);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-5-9"
           cx="34.695347"
           cy="80.538773"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6055);fill-opacity:1;stroke:url(#linearGradient6057);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-8-7"
           cx="37.83363"
           cy="79.966202"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6059);fill-opacity:1;stroke:url(#linearGradient6061);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-4-7"
           cx="40.800121"
           cy="80.714775"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6063);fill-opacity:1;stroke:url(#linearGradient6065);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-7-9"
           cx="43.945339"
           cy="79.974632"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6067);fill-opacity:1;stroke:url(#linearGradient6069);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-1-5"
           cx="47.052876"
           cy="80.611404"
           rx="1.4964142"
           ry="1.4176555" />
      </g>
    </g>
  </g>
 </svg>
--- a/docs/images/nf-core-taxprofiler_icon_border.svg
+++ b/docs/images/nf-core-taxprofiler_icon_border.svg
@ -0,0 +1,445 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   width="180mm"
   height="180mm"
   viewBox="0 0 180 180"
   version="1.1"
   id="svg5581"
   inkscape:version="1.2 (1:1.2+202206011327+fc4e4096c5)"
   sodipodi:docname="nf-core-taxprofiler_icon_border.svg"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:svg="http://www.w3.org/2000/svg">
  <sodipodi:namedview
     id="namedview5583"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     inkscape:showpageshadow="2"
     inkscape:pageopacity="0.0"
     inkscape:pagecheckerboard="0"
     inkscape:deskcolor="#d1d1d1"
     inkscape:document-units="mm"
     showgrid="true"
     inkscape:zoom="0.41291035"
     inkscape:cx="449.25006"
     inkscape:cy="446.82823"
     inkscape:window-width="1920"
     inkscape:window-height="1016"
     inkscape:window-x="784"
     inkscape:window-y="1107"
     inkscape:window-maximized="1"
     inkscape:current-layer="layer1">
    <inkscape:grid
       type="xygrid"
       id="grid6096"
       originx="-7.7520638"
       originy="-7.8991984"
       dotted="true" />
  </sodipodi:namedview>
  <defs
     id="defs5578">
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient30057"
       x1="10.213824"
       y1="221.42242"
       x2="218.95003"
       y2="221.42242"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.6691607,0.01747377,-0.01751914,0.67089835,5.6293239,-100.28017)" />
    <linearGradient
       x1="0"
       y1="0"
       x2="1"
       y2="0"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(37.87862,-29.594021,-29.594021,-37.87862,275.46292,136.24821)"
       spreadMethod="pad"
       id="linearGradient21662">
      <stop
         style="stop-opacity:1;stop-color:#000000"
         offset="0"
         id="stop21652" />
      <stop
         style="stop-opacity:1;stop-color:#0c542a"
         offset="0.214724"
         id="stop21654" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="0.84662598"
         id="stop21656" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="0.846626"
         id="stop21658" />
      <stop
         style="stop-opacity:1;stop-color:#25af64"
         offset="1"
         id="stop21660" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient2708"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174"
       gradientUnits="userSpaceOnUse" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6027"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6029"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6031"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6033"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6035"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6037"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6039"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6041"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6043"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6045"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6047"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6049"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6051"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6053"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6055"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6057"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6059"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6061"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6063"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6065"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6067"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient21662"
       id="linearGradient6069"
       gradientUnits="userSpaceOnUse"
       x1="14.381735"
       y1="80.392174"
       x2="49.04929"
       y2="80.392174" />
  </defs>
  <g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1"
     transform="translate(-7.7520638,-7.8991986)">
    <g
       id="g6123"
       transform="translate(17.317549,39.640907)">
      <path
         id="path30012-5"
         style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
         d="M 9.5900834,8.8480637 C 7.5556439,12.028833 7.7408039,16.123943 7.7687039,19.769833 c 0.83412,6.65305 4.1623501,13.08043 9.6588001,17.05639 2.95411,2.45491 6.166,4.56934 9.30414,6.77667 -1.90794,4.17676 -2.98236,9.29825 -0.29786,13.40918 1.67713,3.72229 4.84583,6.89989 5.31985,11.06218 -0.73221,2.86643 -3.54676,6.3663 -0.97712,9.12477 2.12262,2.77099 4.06395,5.64907 5.35322,8.82231 2.02674,3.35439 6.5976,4.09759 10.20366,3.73748 2.47738,-0.18007 4.72202,-2.67715 3.28025,-5.08004 -1.18247,-2.55998 -4.28744,-2.18411 -6.38635,-3.25125 -1.36801,-2.44734 -3.00653,-5.99123 -0.26124,-8.17889 2.5926,-2.70872 4.69882,-5.81991 6.94515,-8.77398 2.71899,-1.24434 5.98816,-0.0654 8.84595,0.2859 10.88656,2.48435 21.31368,6.88721 32.36055,8.64773 3.13924,0.41138 6.76175,-0.45373 9.579516,0.92166 1.40942,4.43747 2.34224,9.45414 5.76225,12.84528 2.6934,1.93573 7.02228,2.12481 9.53599,-0.13787 1.60636,-2.93172 -1.52253,-5.89536 -4.20194,-6.62585 -0.40071,-2.8223 -0.37969,-5.87179 0.26854,-8.58529 3.77785,-0.24182 7.24094,1.16954 10.61732,2.45295 1.78619,3.3965 5.12259,5.96321 8.91186,6.64761 1.60415,-1.40492 2.56073,1.72858 3.8909,2.50862 2.79709,2.93023 6.8997,5.0038 10.99366,4.75587 3.26301,-0.67749 3.79546,-4.59452 2.47471,-7.17014 -0.80988,-4.2018 -3.22071,-8.59389 -1.2047,-12.82352 1.17357,-5.20598 -1.35199,-10.96325 -5.77146,-13.90702 -4.33733,-3.64436 -10.39077,-3.64653 -14.92742,-6.93614 -10.03223,-6.15469 -21.64077,-9.58338 -33.280326,-11.28456 -12.56653,-0.98847 -25.20371,-0.69003 -37.78225,-1.58411 -7.09295,-0.25502 -14.55087,-1.15633 -21.23347,1.84537 -3.9067,-0.36049 -7.22928,-2.98747 -10.60062,-4.8325 -3.63865,-2.26998 -7.6488,-4.94361 -8.46777,-9.49866 -1.63814,-4.07488 -1.20485,-8.6292 -2.57509,-12.7242393 -0.092,-1.43702 -3.1468606,-2.01015 -3.5173206,-0.42766 z" />
      <path
         style="fill:url(#linearGradient30057);fill-opacity:1;stroke:none;stroke-width:0.424868;stroke-linecap:round"
         d="M 10.931314,9.9653233 C 9.7827934,11.308473 9.8484234,13.338583 9.5801634,15.027613 c -0.34115,2.76171 -0.29206,5.58762 0.6731896,8.22888 1.139301,3.87108 2.982591,7.65359 6.139101,10.27078 3.79198,3.54184 8.2401,6.33641 12.43029,9.30118 -0.91535,2.99306 -2.65543,5.87431 -2.30804,9.13471 0.18061,2.677 1.94571,4.91009 3.11856,7.23201 1.50937,2.8256 3.84729,5.54985 3.82514,8.91032 -0.0787,2.49314 -2.48083,4.5082 -1.96562,7.03872 1.67419,2.68138 3.992,4.96942 5.15932,7.96769 0.65556,1.60629 1.4695,3.40539 3.31812,3.87174 2.38126,0.82152 5.41091,1.64045 7.63103,0.0125 0.82635,-1.48261 -1.06902,-2.72955 -2.36736,-2.87342 -1.71199,-0.17589 -3.69466,-0.59568 -4.39228,-2.41244 -0.96281,-2.26721 -2.29358,-4.88743 -1.3576,-7.36922 2.00069,-3.29907 5.15025,-5.72284 7.05316,-9.10416 0.97281,-1.66962 2.43831,-3.35242 4.55042,-3.26353 6.66383,-0.13609 13.02922,2.25844 19.35504,4.0482 6.38637,1.7963 12.67764,4.02096 19.20445,5.25889 3.77887,0.58276 7.72718,-0.18602 11.419706,0.93529 1.63461,0.75428 1.35512,2.9334 2.05535,4.34159 1.1597,3.06423 1.81242,6.5515 4.16823,8.9707 1.76834,1.42588 4.39281,1.36118 6.4399,0.63293 1.20689,-0.70879 -0.0705,-2.295 -0.82735,-2.84018 -0.85353,-0.97431 -2.49109,-0.81793 -3.19991,-1.88904 -0.61703,-3.21399 -0.48223,-6.57875 0.0181,-9.80404 0.2319,-1.63989 2.04068,-2.10159 3.44204,-1.83986 3.30625,0.10127 6.50718,1.46919 9.55674,2.3424 -0.71254,-1.81326 -1.19119,-3.75193 -0.97348,-5.72815 0.0204,-1.69846 0.15688,-3.58482 1.5917,-4.73058 1.83011,-1.86627 3.60232,-3.85815 5.70315,-5.41088 0.57552,0.16274 -0.36083,0.85452 -0.4987,1.09386 -1.90807,2.09201 -4.24787,3.84191 -5.83093,6.20217 -0.46736,1.92903 -0.41493,3.99677 -0.24575,5.96645 0.84833,2.57726 2.1706,5.12487 4.1346,7.01585 1.36224,0.87656 2.75795,2.00764 4.37729,2.2713 1.6213,-0.74843 1.95011,-2.85006 2.55124,-4.35962 1.10913,-3.99969 1.90782,-8.08534 2.71945,-12.15319 0.1702,-1.75995 0.75519,-4.04477 -0.9398,-5.26766 -0.66443,-0.31636 -0.15084,-1.05324 0.36147,-0.52827 2.02638,1.51086 1.30614,4.35901 1.06011,6.49585 -0.85333,4.26646 -1.66477,8.56551 -2.96213,12.72302 -0.11797,0.91577 -1.34029,1.9982 -0.53389,2.72401 2.58751,2.57362 4.81242,5.92113 8.5085,6.96547 1.59827,0.50471 3.80045,1.66945 5.18808,0.14586 1.17766,-1.4017 -0.0976,-3.1877 -0.28432,-4.72092 -0.75396,-3.29433 -2.15674,-6.56045 -1.96438,-9.99106 0.37367,-2.10533 1.65495,-4.11536 1.02167,-6.32262 -0.34992,-2.84568 -1.45335,-5.69952 -3.77909,-7.50012 -2.77154,-2.67603 -6.40479,-4.14933 -10.09936,-5.01092 -3.81561,-1.24593 -6.9919,-3.83431 -10.61749,-5.50364 -6.72466,-3.69962 -14.2084,-5.65169 -21.615834,-7.44474 -8.303242,-1.93229 -16.888842,-1.47941 -25.343602,-1.84797 -8.46313,-0.1352 -16.90366,-0.80082 -25.3617,-1.05107 -4.38737,-0.20312 -8.92121,-0.0523 -12.99753,1.77349 -2.83378,1.235 -5.55113,-0.83445 -8.06653,-1.90121 -3.95435,-2.11512 -8.20653,-4.63235 -11.45036,-7.81607 -1.44093,-1.52832 -1.98381,-3.13498 -2.61394,-4.93556 -0.98461,-2.81345 -1.62386,-5.97312 -1.9462,-9.06999 -0.24149,-1.39646 -0.0764,-3.01443 -0.81326,-4.2426397 -0.0219,-0.007 -0.045,-0.0107 -0.0678,-0.007 z"
         id="path30012"
         sodipodi:nodetypes="cccccccccccccccccccccccccccccccccccccccccccccccccccccccccscccc"
         inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
         inkscape:export-xdpi="159"
         inkscape:export-ydpi="159" />
      <path
         id="path1218-5-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 36.798094,98.985353 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637147 7.0058785,6.6371479 0 0 1 -7.00589,-6.637147 7.0058785,6.6371479 0 0 1 7.00589,-6.63713 7.0058785,6.6371479 0 0 1 7.00588,6.63713 z" />
      <path
         id="path1218-8-0"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 48.458974,96.857863 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-4-46"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 59.481564,99.639333 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637157 7.0058789,6.6371479 0 0 1 -7.00588,-6.637157 7.0058789,6.6371479 0 0 1 7.00588,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-7-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 71.168234,96.889183 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637157 7.0058785,6.6371479 0 0 1 -7.00588,-6.637157 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00589,6.63715 z" />
      <path
         id="path1218-1-6"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 82.714904,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00587,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00587,6.63714 z" />
      <path
         id="path1218-73-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 93.813184,96.584163 a 7.0058789,6.6371479 0 0 1 -7.00588,6.637147 7.0058789,6.6371479 0 0 1 -7.00587,-6.637147 7.0058789,6.6371479 0 0 1 7.00587,-6.63715 7.0058789,6.6371479 0 0 1 7.00588,6.63715 z" />
      <path
         id="path1218-5-9-3"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 104.85924,98.601273 a 7.0058789,6.6371479 0 0 1 -7.005886,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63716 7.0058789,6.6371479 0 0 1 7.005886,6.63716 z" />
      <path
         id="path1218-8-7-8"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 116.52015,96.473773 a 7.0058785,6.6371479 0 0 1 -7.00589,6.637127 7.0058785,6.6371479 0 0 1 -7.00588,-6.637127 7.0058785,6.6371479 0 0 1 7.00588,-6.63714 7.0058785,6.6371479 0 0 1 7.00589,6.63714 z" />
      <path
         id="path1218-4-7-4"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 127.54274,99.255233 a 7.0058789,6.6371479 0 0 1 -7.00589,6.637147 7.0058789,6.6371479 0 0 1 -7.00588,-6.637147 7.0058789,6.6371479 0 0 1 7.00588,-6.63714 7.0058789,6.6371479 0 0 1 7.00589,6.63714 z" />
      <path
         id="path1218-7-9-00"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 139.22941,96.505103 a 7.0058785,6.6371479 0 0 1 -7.00587,6.637147 7.0058785,6.6371479 0 0 1 -7.00588,-6.637147 7.0058785,6.6371479 0 0 1 7.00588,-6.63716 7.0058785,6.6371479 0 0 1 7.00587,6.63716 z" />
      <path
         id="path1218-1-5-9"
         style="fill:#ffffff;fill-opacity:1;stroke:#ffffff;stroke-width:4.68179;stroke-linecap:round"
         d="m 150.77607,98.871153 a 7.0058785,6.6371479 0 0 1 -7.00588,6.637137 7.0058785,6.6371479 0 0 1 -7.00588,-6.637137 7.0058785,6.6371479 0 0 1 7.00588,-6.63715 7.0058785,6.6371479 0 0 1 7.00588,6.63715 z" />
      <g
         id="g2682"
         style="fill:url(#linearGradient2708);fill-opacity:1;stroke:url(#linearGradient2708)"
         inkscape:export-filename="/Users/whx424/Pictures/Illustrations/taxprofiler_v11.png"
         inkscape:export-xdpi="159"
         inkscape:export-ydpi="159"
         transform="matrix(3.7156967,0,0,3.7156967,-30.828156,-200.55501)">
        <ellipse
           style="fill:url(#linearGradient6027);fill-opacity:1;stroke:url(#linearGradient6029);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-5"
           cx="16.378149"
           cy="80.642143"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6031);fill-opacity:1;stroke:url(#linearGradient6033);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-8"
           cx="19.516428"
           cy="80.069572"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6035);fill-opacity:1;stroke:url(#linearGradient6037);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-4"
           cx="22.48292"
           cy="80.818146"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6039);fill-opacity:1;stroke:url(#linearGradient6041);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-7"
           cx="25.628136"
           cy="80.078003"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6043);fill-opacity:1;stroke:url(#linearGradient6045);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-1"
           cx="28.735676"
           cy="80.714775"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6047);fill-opacity:1;stroke:url(#linearGradient6049);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-73"
           cx="31.72254"
           cy="79.995911"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6051);fill-opacity:1;stroke:url(#linearGradient6053);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-5-9"
           cx="34.695347"
           cy="80.538773"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6055);fill-opacity:1;stroke:url(#linearGradient6057);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-8-7"
           cx="37.83363"
           cy="79.966202"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6059);fill-opacity:1;stroke:url(#linearGradient6061);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-4-7"
           cx="40.800121"
           cy="80.714775"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6063);fill-opacity:1;stroke:url(#linearGradient6065);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-7-9"
           cx="43.945339"
           cy="79.974632"
           rx="1.4964142"
           ry="1.4176555" />
        <ellipse
           style="fill:url(#linearGradient6067);fill-opacity:1;stroke:url(#linearGradient6069);stroke-width:1;stroke-linecap:round;stroke-miterlimit:4;stroke-dasharray:none"
           id="path1218-1-5"
           cx="47.052876"
           cy="80.611404"
           rx="1.4964142"
           ry="1.4176555" />
      </g>
    </g>
  </g>
 </svg>
--- a/docs/images/nf-core-taxprofiler_logo_custom_dark.png
+++ b/docs/images/nf-core-taxprofiler_logo_custom_dark.png
--- a/docs/images/nf-core-taxprofiler_logo_custom_dark.svg
+++ b/docs/images/nf-core-taxprofiler_logo_custom_dark.svg
--- a/docs/images/nf-core-taxprofiler_logo_custom_light.png
+++ b/docs/images/nf-core-taxprofiler_logo_custom_light.png
--- a/docs/images/nf-core-taxprofiler_logo_custom_light.svg
+++ b/docs/images/nf-core-taxprofiler_logo_custom_light.svg
--- a/docs/images/nf_core_taxprofiler_icon_border.png
+++ b/docs/images/nf_core_taxprofiler_icon_border.png
--- a/docs/images/taxprofiler_logo.svg
+++ b/docs/images/taxprofiler_logo.svg
--- a/docs/images/taxprofiler_tube.pdf
+++ b/docs/images/taxprofiler_tube.pdf
--- a/docs/images/taxprofiler_tube.png
+++ b/docs/images/taxprofiler_tube.png
--- a/docs/images/taxprofiler_tube.svg
+++ b/docs/images/taxprofiler_tube.svg
--- a/docs/usage.md
+++ b/docs/usage.md
@ -8,56 +8,163 @@
 <!-- TODO nf-core: Add documentation about anything specific to running your pipeline. For general topics, please point to (and add to) the main nf-core website. -->
-## Samplesheet input
+## Samplesheet inputs
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
+nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
-```bash
+> ⚠️ Input FASTQ and FASTA files _must_ be gzipped
--input '[path to samplesheet file]'
+
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
 This samplesheet is then specified on the command line as follows:
 ```console
 --input '[path to samplesheet file]' --databases '[path to database sheet file]'
 ```
 ### Multiple runs of the same sample
-The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes:
+The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate different runs FASTQ files of the same sample before performing profiling, when `--perform_runmerging` is supplied. Below is an example for the same sample sequenced across 3 lanes:
 ```console
-sample,fastq_1,fastq_2
+sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+2612,run1,ILLUMINA,2612_run1_R1.fq.gz,,
-CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz
+2612,run2,ILLUMINA,2612_run2_R1.fq.gz,,
-CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz
+2612,run3,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz,
 ```
 > ⚠️ Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged.
 ### Full samplesheet
-The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below.
+The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 6 columns to match those defined in the table below.
-A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice.
+A final samplesheet file consisting of both single- and paired-end data, as well as long-read FASTA files may look something like the one below. This is for 6 samples, where `2612` has been sequenced twice.
 ```console
-sample,fastq_1,fastq_2
+sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
-CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz
+2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
-CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz
+2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
-TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,
+2613,ERR5766181,ILLUMINA,/<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz,/<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz,
-TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,
+ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,
 TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,
 TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,
 ```
-| Column    | Description                                                                                                                                                                            |
+| Column                | Description                                                                                                                                                                                              |
-| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`  | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). |
+| `sample`              | Unique sample name [required].                                                                                                                                                                           |
-| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+| `run_accession`       | Run ID or name unique for each (pairs of) file(s) .Can also supply sample name again here, if only a single run was generated [required].                                                                |
-| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz".                                                             |
+| `instrument_platform` | Sequencing platform reads generated on, selected from the EBI ENA [controlled vocabulary](https://www.ebi.ac.uk/ena/portal/api/controlledVocab?field=instrument_platform) [required].                    |
 | `fastq_1`             | Path or URL to sequencing reads or for Illumina R1 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if data in FASTA is specifed. Cannot be combined with `fasta`. |
 | `fastq_2`             | Path or URL to Illumina R2 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if single end data. Cannot be combined with `fasta`.                                   |
 | `fasta`               | Path or URL to long-reads or contigs in FASTA format. GZipped compressed files accepted. Can be left empty if data in FASTA is specifed. Cannot be combined with `fastq_1` or `fastq_2`.                 |
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 ### Full database sheet
 nf-core/taxprofiler supports multiple databases being profiled in parallel for each tool.
 Databases can be supplied either in the form of a compressed `.tar.gz` archive of a directory containing all relevant database files or the path to a directory on the filesystem.
 The pipeline takes the locations and specific profiling parameters of the tool of these databases as input via a four column comma-separated sheet.
 > ⚠️ nf-core/taxprofiler does not provide any databases by default, nor does it currently generate them for you. This must be performed manually by the user. See below for more information of the expected database files.
 An example database sheet can look as follows, where 5 tools are being used, and `malt` and `kraken2` will be used against two databases each. This is because specifying `bracken` implies first running `kraken2` on the same database.
 ```console
 tool,db_name,db_params,db_path
 malt,malt85,-id 85,/<path>/<to>/malt/testdb-malt/
 malt,malt95,-id 90,/<path>/<to>/malt/testdb-malt.tar.gz
 bracken,db1,,/<path>/<to>/bracken/testdb-bracken.tar.gz
 kraken2,db2,--quick,/<path>/<to>/kraken2/testdb-kraken2.tar.gz
 krakenuniq,db3,,/<path>/<to>/krakenuniq/testdb-krakenuniq.tar.gz
 centrifuge,db1,,/<path>/<to>/centrifuge/minigut_cf.tar.gz
 metaphlan3,db1,,/<path>/<to>/metaphlan3/metaphlan_database/
 motus,db_mOTU,,/<path>/<to>/motus/motus_database/
 ```
 Column specifications are as follows:
 | Column      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `tool`      | Taxonomic profiling tool (supported by nf-core/taxprofiler) that the database has been indexed for [required]. Please note that `bracken` also implies running `kraken2` on the same database.                                                                                                                                                                                                                                                                 |
 | `db_name`   | A unique name per tool for the particular database [required]. Please note that names need to be unique across both `kraken2` and `bracken` as well, even if re-using the same database.                                                                                                                                                                                                                                                                       |
 | `db_params` | Any parameters of the given taxonomic profiler that you wish to specify that the taxonomic profiling tool should use when profiling against this specific. Can be empty to use taxonomic profiler defaults. Must not be surrounded by quotes [required]. We generally do not recommend specifying parameters here that turn on/off saving of output files or specifying particular file extensions - this should be already addressed via pipeline parameters. |
 | `db_path`   | Path to the database. Can either be a path to a directory containing the database index files or a `.tar.gz` file which contains the compressed database directory with the same name as the tar archive, minus `.tar.gz` [required].                                                                                                                                                                                                                          |
 > 💡 You can also specify the same database directory/file twice (ensuring unique `db_name`s) and specify different parameters for each database to compare the effect of different parameters during profiling.
 nf-core/taxprofiler will automatically decompress and extract any compressed archives for you.
 Expected (uncompressed) database files for each tool are as follows:
 - **MALT** output of `malt-build`. A directory containing:
  - `ref.idx`
  - `taxonomy.idx`
  - `taxonomy.map`
  - `index0.idx`
  - `table0.idx`
  - `table0.db`
  - `ref.inf`
  - `ref.db`
  - `taxonomy.tre`
 - **Kraken2** output of `kraken2-build` command(s) A directory containing:
  - `opts.k2d`
  - `hash.k2d`
  - `taxo.k2d`
 - **Bracken** output of a combined `kraken2-` and `bracken-build` process. Please see the [documentation on Bracken](https://github.com/jenniferlu717/Bracken#running-bracken-easy-version) for details. The output is a directory containing files per expected sequencing read length similarly to:
  - `hash.k2d`
  - `opts.k2d`
  - `taxo.k2d`
  - `database.kraken`
  - `database100mers.kmer_distrib`
  - `database100mers.kraken`
  - `database150mers.kmer_distrib`
  - `database150mers.kraken`
 - **KrakenUniq** output of `krakenuniq-build` command(s) A directory containing:
  - `opts.k2d`
  - `hash.k2d`
  - `taxo.k2d`
  - `database.idx`
  - `taxDB`
 - **Centrifuge** output of `centrifuge-build`. A directory containing:
  - `<database_name>.<number>.cf`
  - `<database_name>.<number>.cf`
  - `<database_name>.<number>.cf`
  - `<database_name>.<number>.cf`
 - **MetaPhlAn3** generated with `metaphlan --install` or downloaded from links on the [MetaPhlAn3 wiki](https://github.com/biobakery/MetaPhlAn/wiki/MetaPhlAn-3.0#customizing-the-database). A directory containing:
  - `mpa_v30_CHOCOPhlAn_201901.pkl`
  - `mpa_v30_CHOCOPhlAn_201901.pkl`
  - `mpa_v30_CHOCOPhlAn_201901.fasta`
  - `mpa_v30_CHOCOPhlAn_201901.3.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.4.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.1.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.2.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.rev.1.bt2`
  - `mpa_v30_CHOCOPhlAn_201901.rev.2.bt2`
  - `mpa_latest`
 - **Kaiju** output of `kaiju-makedb`. A directory containing:
  - `kaiju_db_*.fmi`
  - `nodes.dmp`
  - `names.dmp`
 - **DIAMOND** output of `diamond makedb`. Note: requires building with taxonomy files
  to generate taxonomic profile. See [DIAMOND documentation](https://github.com/bbuchfink/diamond/wiki/3.-Command-line-options#makedb-options). A file named:
  - `<database_name>.dmnd`
 - **mOTUs** is composed of code and database together. The mOTUs tools
  [`downloadDB`](https://github.com/motu-tool/mOTUs/blob/master/motus/downloadDB.py)
  is used to prepare the mOTUs database and create a file with the version information.
  The database download step can be time consuming and the database will be consisting
  with same release version of the mOTUs tools. The database for same version tools
  can be thus reused for multiple runs. Users can download the database once using the script above and
  specify the path the database to the TSV table provided to `--databases`.
 ## Running the pipeline
 The typical command for running the pipeline is as follows:
-```bash
+```console
-nextflow run nf-core/taxprofiler --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile docker
+nextflow run nf-core/taxprofiler --input samplesheet.csv --databases databases.csv --outdir <OUTDIR> -profile docker --run_<TOOL1> --run_<TOOL2>
 ```
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@ -71,6 +178,98 @@ work                # Directory containing the nextflow working files
 # Other nextflow hidden files, eg. history of pipeline runs and old logs.
 ```
 ### Sequencing quality control
 nf-core taxprofiler offers [`falco`][https://github.com/smithlabcode/falco] as an alternative option to [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
 ### Preprocessing Steps
 nf-core/taxprofiler offers four main preprocessing steps
 - Read processing: adapter clipping and pair-merging.
 - Complexity filtering: removal of low-sequence complexity reads.
 - Host read-removal: removal of reads aligning to reference genome(s) of a host.
 - Run merging: concatenation of multiple FASTQ chunks/sequencing runs/libraries of a sample.
 #### Read Processing
 Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags.
 It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles.
 There are currently two options for short-read preprocessing: `fastp` or `adapterremoval`.
 For adapter clipping, you can either rely on tool default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`)
 By default, paired-end merging is not activated and paired-end profiling is performed where supported otherwise pairs will be independently profiled. If paired-end merging is activated you can also specify whether to include unmerged reads in the reads sent for profiling (`--shortread_qc_mergepairs` and `--shortread_qc_includeunmerged`).
 You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`).
 Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. Performing length filtering can be useful to remove short (often low sequencing complexity) sequences that result in unspecific classification and therefore slow down runtime during profiling, with minimal gain.
 There is currently one option for long-read Oxford Nanopore processing: `porechop`.
 For both short-read and long-read preprocessing, you can optionally save the resulting processed reads with `--save_preprocessed_reads`.
 #### Complexity Filtering
 Complexity filtering can be activated via the `--perform_shortread_complexityfilter` flag.
 Complexity filtering is primarily a run-time optimisation step. It is not necessary for accurate taxonomic profiling, however it can speed up run-time of each tool by removing reads with low-diversity of nucleotides (e.g. with mono-nucleotide - `AAAAAAAA`, or di-nucleotide repeats `GAGAGAGAGAGAGAG`) that have a low-chance of giving an informative taxonomic ID as they can be associated with many different taxa. Removing these reads therefore saves computational time and resources.
 There are currently three options for short-read complexity filtering: [`bbduk`](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/bbduk-guide/), [`prinseq++`](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus), and [`fastp`](https://github.com/OpenGene/fastp#low-complexity-filter).
 The tools offer different algorithms and parameters for removing low complexity reads. We therefore recommend reviewing the pipeline's [parameter documentation](https://nf-co.re/taxprofiler/parameters) and the documentation of the tools (see links above) to decide on optimal methods and parameters for your dataset.
 You can optionally save the FASTQ output of the run merging with the `--save_complexityfiltered_reads`. If running with `fastp`, complexity filtering happens inclusively within the earlier shortread preprocessing step. Therefore there will not be an independent pipeline step for complexity filtering, and no independent FASTQ file (i.e. `--save_complexityfiltered_reads` will be ignored) - your complexity filtered reads will also be in the `fastp/` folder in the same file(s) as the preprocessed read.
 #### Host Removal
 Removal of possible-host reads from FASTQ files prior profiling can be activated with `--perform_shortread_hostremoval` or `--perform_longread_hostremoval`.
 Similarly to complexity filtering, host-removal can be useful for runtime optimisation and reduction in misclassified reads. It is not always necessary to report classification of reads from a host when you already know the host of the sample, therefore you can gain a run-time and computational advantage by removing these prior typically resource-heavy profiling with more efficient methods. Furthermore, particularly with human samples, you can reduce the number of false positives during profiling that occur due to host-sequence contamination in reference genomes on public databases.
 nf-core/taxprofiler currently offers host-removal via alignment against a reference genome with Bowtie2, and the use of the unaligned reads for downstream profiling.
 You can supply your reference genome in FASTA format with `--hostremoval_reference`. You can also optionally supply a directory containing pre-indexed Bowtie2 index files with `--shortread_hostremoval_index` or a minimap2 `.mmi` file for `--longread_hostremoval_index`, however nf-core/taxprofiler will generate these for you if necessary. Pre-supplying the index directory or files can greatly speed up the process, and these can be re-used.
 > 💡 If you have multiple taxa or sequences you wish to remove (e.g., the host genome and then also PhiX - common quality-control reagent during sequencing) you can simply concatenate the FASTAs of each taxa or sequences into a single reference file.
 #### Run Merging
 For samples that may have been sequenced over multiple runs, or for FASTQ files split into multiple chunks, you can activate the ability to merge across all runs or chunks with `--perform_runmerging`.
 For more information how to set up your input samplesheet, see [Multiple runs of the same sample](#multiple-runs-of-the-same-sample).
 Activating this functionality will concatenate the FASTQ files with the same sample name _after_ the optional preprocessing steps and _before_ profiling. Note that libraries with runs of different pairing types will **not** be merged and this will be indicated on output files with a `_se` or `_pe` suffix to the sample name accordingly.
 You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.
 ##### Profiling
 ###### Bracken
 It is unclear whether Bracken is suitable for running long reads, as it makes certain assumptions about read lengths. Furthemore, during testing we found issues where Bracken would fail on the long-read test data. Therefore nf-core/taxprofiler does not run Bracken on data specified as being sequenced with `OXFORD_NANOPORE` in the input samplesheet. If you believe this to be wrong, please contact us on the nf-core slack and we can discuss this.
 ###### Centrifuge
 Centrifuge currently does not accept FASTA files as input, therefore no output will be produced for these input files.
 ###### DIAMOND
 DIAMOND only allows output of a single format at a time, therefore parameters such --diamond_save_reads supplied will result in only aligned reads in SAM format will be produced, no taxonomic profiles will be available. Be aware of this when setting up your pipeline runs, depending n your particular use case.
 ###### MALT
 MALT does not support paired-end reads alignment (unlike other tools), therefore nf-core/taxprofiler aligns these as indepenent files if read-merging is skipped. If you skip merging, you can sum or average the results of the counts of the pairs.
 Krona can only be run on MALT output if path to Krona taxonomy database supplied to `--krona_taxonomy_directory`. Therefore if you do not supply the a KRona directory, Krona plots will not be produced for MALT.
 ###### MetaPhlAn3
 MetaPhlAn3 currently does not accept FASTA files as input, therefore no output will be produced for these input files.
 ###### mOTUs
 mOTUs currently does not accept FASTA files as input, therefore no output will be produced for these input files.
 ### Updating the pipeline
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
@ -268,3 +467,13 @@ We recommend adding the following line to your environment to limit this (typica
 ```bash
 NXF_OPTS='-Xms1g -Xmx4g'
 ```
 ## Troubleshooting and FAQs
 ### I get a warning during centrifuge_kreport process with exit status 255
 When a sample has insufficient hits for abundance estimation, the resulting `report.txt` file will be empty.
 When trying to convert this to a kraken-style report, the conversion tool will exit with a status code `255`, and provide a `WARN`.
 This is **not** an error nor a failure of the pipeline, just your sample has no hits to the provided database when using centrifuge.
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@ -12,9 +12,9 @@ class WorkflowMain {
            // TODO nf-core: Add Zenodo DOI for pipeline after first release
            //"* The pipeline\n" +
            //"  https://doi.org/10.5281/zenodo.XXXXXXX\n\n" +
-            "* The nf-core framework\n" +
+            '* The nf-core framework\n' +
-            "  https://doi.org/10.1038/s41587-020-0439-x\n\n" +
+            '  https://doi.org/10.1038/s41587-020-0439-x\n\n' +
-            "* Software dependencies\n" +
+            '* Software dependencies\n' +
            "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
    }
@ -96,4 +96,5 @@ class WorkflowMain {
        }
        return null
    }
 }
--- a/lib/WorkflowTaxprofiler.groovy
+++ b/lib/WorkflowTaxprofiler.groovy
@ -12,11 +12,11 @@ class WorkflowTaxprofiler {
    public static void initialise(params, log) {
        genomeExistsError(params, log)
-
+        // TODO update as necessary
-        if (!params.fasta) {
+        //if (!params.fasta) {
-            log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
+        //    log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
-            System.exit(1)
+        //    System.exit(1)
-        }
+        //}
    }
    //
--- a/modules.json
+++ b/modules.json
@ -5,20 +5,211 @@
        "https://github.com/nf-core/modules.git": {
            "modules": {
                "nf-core": {
                    "adapterremoval": {
                        "branch": "master",
                        "git_sha": "ce7cf27e377fdacf7ebe8e75903ec70405ea1659",
                        "installed_by": ["modules"]
                    },
                    "bbmap/bbduk": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "bowtie2/align": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "bowtie2/build": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "bracken/bracken": {
                        "branch": "master",
                        "git_sha": "8cab56516076b23c6f8eb1ac20ba4ce9692c85e1",
                        "installed_by": ["modules"]
                    },
                    "bracken/combinebrackenoutputs": {
                        "branch": "master",
                        "git_sha": "9c87d5fdad182590a370ea43a4ecebd200a6f6fb",
                        "installed_by": ["modules"]
                    },
                    "cat/fastq": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "centrifuge/centrifuge": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "centrifuge/kreport": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "custom/dumpsoftwareversions": {
                        "branch": "master",
                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                        "installed_by": ["modules"]
                    },
                    "diamond/blastx": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "falco": {
                        "branch": "master",
                        "git_sha": "fc959214036403ad83efe7a41d43d0606c445cda",
                        "installed_by": ["modules"],
                        "patch": "modules/nf-core/falco/falco.diff"
                    },
                    "fastp": {
                        "branch": "master",
                        "git_sha": "1e49f31e93c56a3832833eef90a02d3cde5a3f7e",
                        "installed_by": ["modules"]
                    },
                    "fastqc": {
                        "branch": "master",
                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                        "installed_by": ["modules"]
                    },
                    "filtlong": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "gunzip": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "kaiju/kaiju": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "kaiju/kaiju2krona": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "kaiju/kaiju2table": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "kraken2/kraken2": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "krakentools/combinekreports": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "krakentools/kreport2krona": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "krakenuniq/preloadedkrakenuniq": {
                        "branch": "master",
                        "git_sha": "05649975c6611c6e007537a7984e186e12ae03af",
                        "installed_by": ["modules"]
                    },
                    "krona/ktimporttaxonomy": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "krona/ktimporttext": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "malt/run": {
                        "branch": "master",
                        "git_sha": "6d9712f03ec2de8264a50ee4541a617e1e063b51",
                        "installed_by": ["modules"]
                    },
                    "megan/rma2info": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "metaphlan3/mergemetaphlantables": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "metaphlan3/metaphlan3": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "minimap2/align": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "minimap2/index": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "motus/merge": {
                        "branch": "master",
                        "git_sha": "3fce766123e71e82fb384db7d07b59180baa9ee9",
                        "installed_by": ["modules"]
                    },
                    "motus/profile": {
                        "branch": "master",
                        "git_sha": "3fce766123e71e82fb384db7d07b59180baa9ee9",
                        "installed_by": ["modules"]
                    },
                    "multiqc": {
                        "branch": "master",
                        "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
                        "installed_by": ["modules"]
                    },
                    "porechop/porechop": {
                        "branch": "master",
                        "git_sha": "2a4e85eb81875a572bb58133e37f84ba3cc484d7",
                        "installed_by": ["modules"]
                    },
                    "prinseqplusplus": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "samtools/bam2fq": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "samtools/index": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "samtools/stats": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "samtools/view": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    },
                    "untar": {
                        "branch": "master",
                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905",
                        "installed_by": ["modules"]
                    }
                }
            }
--- a/modules/local/ensure_fastq_extension.nf
+++ b/modules/local/ensure_fastq_extension.nf
@ -0,0 +1,31 @@
 process ENSURE_FASTQ_EXTENSION {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "conda-forge::bash=5.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
        'biocontainers/biocontainers:v1.2.0_cv2' }"
    input:
    tuple val(meta), path(reads)
    output:
    tuple val(meta), path('*.fastq.gz'), emit: reads
    script:
    if (meta.single_end) {
        fastq = "${reads.baseName}.fastq.gz"
        """
        ln -s '${reads}' '${fastq}'
        """
    } else {
        first = "${reads[0].baseName}.fastq.gz"
        second = "${reads[1].baseName}.fastq.gz"
        """
        ln -s '${reads[0]}' '${first}'
        ln -s '${reads[1]}' '${second}'
        """
    }
 }
--- a/modules/local/kraken2_standard_report.nf
+++ b/modules/local/kraken2_standard_report.nf
@ -0,0 +1,32 @@
 process KRAKEN2_STANDARD_REPORT {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? 'conda-forge::sed=4.8' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv2/biocontainers_v1.2.0_cv2.img' :
        'biocontainers/biocontainers:v1.2.0_cv2' }"
    input:
    tuple val(meta), path(report)
    output:
    tuple val(meta), path(result), emit: report
    path 'versions.yml'          , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def prefix = task.ext.prefix ?: "${meta.id}"
    result = "${prefix}_standardized.kraken2.report.txt"
    """
    cut -f1-3,6-8 '${report}' > '${result}'
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        cut: \$(echo \$(cut --version 2>&1) | sed 's/^.*(GNU coreutils) //; s/ Copyright.*\$//')
    END_VERSIONS
    """
 }
--- a/modules/local/krona_cleanup.nf
+++ b/modules/local/krona_cleanup.nf
@ -0,0 +1,40 @@
 process KRONA_CLEANUP {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
        'biocontainers/biocontainers:v1.2.0_cv1' }"
    input:
    tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
    output:
    tuple val(meta), path("*.txt"), emit: txt
    path "versions.yml", emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    # Copy the file to a new name
    cp ${krona} ${prefix}.txt
    # Remove ugly 'x__' prefixes for each of the taxonomic levels
    LEVELS=(d k p c o f g s)
    for L in "\${LEVELS[@]}"; do
        sed -i "s/\${L}__//g" ${prefix}.txt
    done
    # Remove underscores that are standing in place of spaces
    sed -i "s/_/ /g" ${prefix}.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
    END_VERSIONS
    """
 }
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@ -1,6 +1,5 @@
 process SAMPLESHEET_CHECK {
    tag "$samplesheet"
    label 'process_single'
    conda "conda-forge::python=3.8.3"
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@ -14,9 +13,6 @@ process SAMPLESHEET_CHECK {
    path '*.csv'       , emit: csv
    path "versions.yml", emit: versions
    when:
    task.ext.when == null || task.ext.when
    script: // This script is bundled with the pipeline, in nf-core/taxprofiler/bin/
    """
    check_samplesheet.py \\
--- a/modules/nf-core/adapterremoval/main.nf
+++ b/modules/nf-core/adapterremoval/main.nf
@ -0,0 +1,92 @@
 process ADAPTERREMOVAL {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::adapterremoval=2.3.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' :
        'quay.io/biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }"
    input:
    tuple val(meta), path(reads)
    path(adapterlist)
    output:
    tuple val(meta), path("${prefix}.truncated.fastq.gz")            , optional: true, emit: singles_truncated
    tuple val(meta), path("${prefix}.discarded.fastq.gz")            , optional: true, emit: discarded
    tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz")  , optional: true, emit: paired_truncated
    tuple val(meta), path("${prefix}.collapsed.fastq.gz")            , optional: true, emit: collapsed
    tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz")  , optional: true, emit: collapsed_truncated
    tuple val(meta), path("${prefix}.paired.fastq.gz")               , optional: true, emit: paired_interleaved
    tuple val(meta), path('*.settings')                              , emit: settings
    path "versions.yml"                                              , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def list = adapterlist ? "--adapter-list ${adapterlist}" : ""
    prefix = task.ext.prefix ?: "${meta.id}"
    if (meta.single_end) {
        """
        AdapterRemoval  \\
            --file1 $reads \\
            $args \\
            $list \\
            --basename ${prefix} \\
            --threads ${task.cpus} \\
            --seed 42 \\
            --gzip
        ensure_fastq() {
            if [ -f "\${1}" ]; then
                mv "\${1}" "\${1::-3}.fastq.gz"
            fi
        }
        ensure_fastq '${prefix}.truncated.gz'
        ensure_fastq '${prefix}.discarded.gz'
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
        END_VERSIONS
        """
    } else {
        """
        AdapterRemoval  \\
            --file1 ${reads[0]} \\
            --file2 ${reads[1]} \\
            $args \\
            $list \\
            --basename ${prefix} \\
            --threads $task.cpus \\
            --seed 42 \\
            --gzip
        ensure_fastq() {
            if [ -f "\${1}" ]; then
                mv "\${1}" "\${1::-3}.fastq.gz"
            fi
        }
        ensure_fastq '${prefix}.truncated.gz'
        ensure_fastq '${prefix}.discarded.gz'
        ensure_fastq '${prefix}.pair1.truncated.gz'
        ensure_fastq '${prefix}.pair2.truncated.gz'
        ensure_fastq '${prefix}.collapsed.gz'
        ensure_fastq '${prefix}.collapsed.truncated.gz'
        ensure_fastq '${prefix}.paired.gz'
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g")
        END_VERSIONS
        """
    }
 }
--- a/modules/nf-core/adapterremoval/meta.yml
+++ b/modules/nf-core/adapterremoval/meta.yml
@ -0,0 +1,90 @@
 name: adapterremoval
 description: Trim sequencing adapters and collapse overlapping reads
 keywords:
  - trimming
  - adapters
  - merging
  - fastq
 tools:
  - adapterremoval:
      description: The AdapterRemoval v2 tool for merging and clipping reads.
      homepage: https://github.com/MikkelSchubert/adapterremoval
      documentation: https://adapterremoval.readthedocs.io
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
  - adapterlist:
      type: file
      description: Optional text file containing list of adapters to look for for removal
        with one adapter per line. Otherwise will look for default adapters (see
        AdapterRemoval man page), or can be modified to remove user-specified
        adapters via ext.args.
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - singles_truncated:
      type: file
      description: |
        Adapter trimmed FastQ files of either single-end reads, or singleton
        'orphaned' reads from merging of paired-end data (i.e., one of the pair
        was lost due to filtering thresholds).
      pattern: "*.truncated.fastq.gz"
  - discarded:
      type: file
      description: |
        Adapter trimmed FastQ files of reads that did not pass filtering
        thresholds.
      pattern: "*.discarded.fastq.gz"
  - pair1_truncated:
      type: file
      description: |
        Adapter trimmed R1 FastQ files of paired-end reads that did not merge
        with their respective R2 pair due to long templates. The respective pair
        is stored in 'pair2_truncated'.
      pattern: "*.pair1.truncated.fastq.gz"
  - pair2_truncated:
      type: file
      description: |
        Adapter trimmed R2 FastQ files of paired-end reads that did not merge
        with their respective R1 pair due to long templates. The respective pair
        is stored in 'pair1_truncated'.
      pattern: "*.pair2.truncated.fastq.gz"
  - collapsed:
      type: file
      description: |
        Collapsed FastQ of paired-end reads that successfully merged with their
        respective R1 pair but were not trimmed.
      pattern: "*.collapsed.fastq.gz"
  - collapsed_truncated:
      type: file
      description: |
        Collapsed FastQ of paired-end reads that successfully merged with their
        respective R1 pair and were trimmed of adapter due to sufficient overlap.
      pattern: "*.collapsed.truncated.fastq.gz"
  - log:
      type: file
      description: AdapterRemoval log file
      pattern: "*.settings"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@maxibor"
  - "@jfy133"
--- a/modules/nf-core/bbmap/bbduk/main.nf
+++ b/modules/nf-core/bbmap/bbduk/main.nf
@ -0,0 +1,43 @@
 process BBMAP_BBDUK {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::bbmap=38.90" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bbmap:38.90--he522d1c_1' :
        'quay.io/biocontainers/bbmap:38.90--he522d1c_1' }"
    input:
    tuple val(meta), path(reads)
    path contaminants
    output:
    tuple val(meta), path('*.fastq.gz'), emit: reads
    tuple val(meta), path('*.log')     , emit: log
    path "versions.yml"                , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def raw      = meta.single_end ? "in=${reads[0]}" : "in1=${reads[0]} in2=${reads[1]}"
    def trimmed  = meta.single_end ? "out=${prefix}.fastq.gz" : "out1=${prefix}_1.fastq.gz out2=${prefix}_2.fastq.gz"
    def contaminants_fa = contaminants ? "ref=$contaminants" : ''
    """
    maxmem=\$(echo \"$task.memory\"| sed 's/ GB/g/g')
    bbduk.sh \\
        -Xmx\$maxmem \\
        $raw \\
        $trimmed \\
        threads=$task.cpus \\
        $args \\
        $contaminants_fa \\
        &> ${prefix}.bbduk.log
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        bbmap: \$(bbversion.sh)
    END_VERSIONS
    """
 }
--- a/modules/nf-core/bbmap/bbduk/meta.yml
+++ b/modules/nf-core/bbmap/bbduk/meta.yml
@ -0,0 +1,53 @@
 name: bbmap_bbduk
 description: Adapter and quality trimming of sequencing reads
 keywords:
  - trimming
  - adapter trimming
  - quality trimming
  - fastq
 tools:
  - bbmap:
      description: BBMap is a short read aligner, as well as various other bioinformatic tools.
      homepage: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
      documentation: https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/
      tool_dev_url: None
      doi: ""
      licence: ["UC-LBL license (see package)"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
  - contaminants:
      type: file
      description: |
        Reference files containing adapter and/or contaminant sequences for sequence kmer matching
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: The trimmed/modified fastq reads
      pattern: "*fastq.gz"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - log:
      type: file
      description: Bbduk log file
      pattern: "*bbduk.log"
 authors:
  - "@MGordon09"
--- a/modules/nf-core/bowtie2/align/main.nf
+++ b/modules/nf-core/bowtie2/align/main.nf
@ -0,0 +1,71 @@
 process BOWTIE2_ALIGN {
    tag "$meta.id"
    label "process_high"
    conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null)
    container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ?
        "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" :
        "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }"
    input:
    tuple val(meta), path(reads)
    path  index
    val   save_unaligned
    val   sort_bam
    output:
    tuple val(meta), path("*.bam")    , emit: bam
    tuple val(meta), path("*.log")    , emit: log
    tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true
    path  "versions.yml"              , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ""
    def args2 = task.ext.args2 ?: ""
    def prefix = task.ext.prefix ?: "${meta.id}"
    def unaligned = ""
    def reads_args = ""
    if (meta.single_end) {
        unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ""
        reads_args = "-U ${reads}"
    } else {
        unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ""
        reads_args = "-1 ${reads[0]} -2 ${reads[1]}"
    }
    def samtools_command = sort_bam ? 'sort' : 'view'
    """
    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"`
    [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"`
    [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1
    bowtie2 \\
        -x \$INDEX \\
        $reads_args \\
        --threads $task.cpus \\
        $unaligned \\
        $args \\
        2> ${prefix}.bowtie2.log \\
        | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
    if [ -f ${prefix}.unmapped.fastq.1.gz ]; then
        mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz
    fi
    if [ -f ${prefix}.unmapped.fastq.2.gz ]; then
        mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz
    fi
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/bowtie2/align/meta.yml
+++ b/modules/nf-core/bowtie2/align/meta.yml
@ -0,0 +1,62 @@
 name: bowtie2_align
 description: Align reads to a reference genome using bowtie2
 keywords:
  - align
  - map
  - fasta
  - fastq
  - genome
  - reference
 tools:
  - bowtie2:
      description: |
        Bowtie 2 is an ultrafast and memory-efficient tool for aligning
        sequencing reads to long reference sequences.
      homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
      documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
      doi: 10.1038/nmeth.1923
      licence: ["GPL-3.0-or-later"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
  - index:
      type: file
      description: Bowtie2 genome index files
      pattern: "*.ebwt"
  - save_unaligned:
      type: boolean
      description: |
        Save reads that do not map to the reference (true) or discard them (false)
        (default: false)
  - sort_bam:
      type: boolean
      description: use samtools sort (true) or samtools view (false)
      pattern: "true or false"
 output:
  - bam:
      type: file
      description: Output BAM file containing read alignments
      pattern: "*.{bam}"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - fastq:
      type: file
      description: Unaligned FastQ files
      pattern: "*.fastq.gz"
  - log:
      type: file
      description: Aligment log
      pattern: "*.log"
 authors:
  - "@joseespinosa"
  - "@drpatelh"
--- a/modules/nf-core/bowtie2/build/main.nf
+++ b/modules/nf-core/bowtie2/build/main.nf
@ -0,0 +1,30 @@
 process BOWTIE2_BUILD {
    tag "$fasta"
    label 'process_high'
    conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' :
        'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }"
    input:
    path fasta
    output:
    path 'bowtie2'      , emit: index
    path "versions.yml" , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    """
    mkdir bowtie2
    bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/bowtie2/build/meta.yml
+++ b/modules/nf-core/bowtie2/build/meta.yml
@ -0,0 +1,33 @@
 name: bowtie2_build
 description: Builds bowtie index for reference genome
 keywords:
  - build
  - index
  - fasta
  - genome
  - reference
 tools:
  - bowtie2:
      description: |
        Bowtie 2 is an ultrafast and memory-efficient tool for aligning
        sequencing reads to long reference sequences.
      homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
      documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
      doi: 10.1038/nmeth.1923
      licence: ["GPL-3.0-or-later"]
 input:
  - fasta:
      type: file
      description: Input genome fasta file
 output:
  - index:
      type: file
      description: Bowtie2 genome index files
      pattern: "*.bt2"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@joseespinosa"
  - "@drpatelh"
--- a/modules/nf-core/bracken/bracken/main.nf
+++ b/modules/nf-core/bracken/bracken/main.nf
@ -0,0 +1,42 @@
 process BRACKEN_BRACKEN {
    tag "$meta.id"
    label 'process_low'
    // WARN: Version information not provided by tool on CLI.
    // Please update version string below when bumping container versions.
    conda (params.enable_conda ? "bioconda::bracken=2.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
        'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
    input:
    tuple val(meta), path(kraken_report)
    path database
    output:
    tuple val(meta), path(bracken_report), emit: reports
    path "versions.yml"          , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ""
    def prefix = task.ext.prefix ?: "${meta.id}"
    bracken_report = "${prefix}.tsv"
    // WARN: Version information not provided by tool on CLI.
    // Please update version string below when bumping container versions.
    def VERSION = '2.7'
    """
    bracken \\
        ${args} \\
        -d '${database}' \\
        -i '${kraken_report}' \\
        -o '${bracken_report}'
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        bracken: ${VERSION}
    END_VERSIONS
    """
 }
--- a/modules/nf-core/bracken/bracken/meta.yml
+++ b/modules/nf-core/bracken/bracken/meta.yml
@ -0,0 +1,48 @@
 name: bracken_bracken
 description: Re-estimate taxonomic abundance of metagenomic samples analyzed by kraken.
 keywords:
  - bracken
  - metagenomics
  - abundance
  - kraken2
 tools:
  - bracken:
      description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
      homepage: https://ccb.jhu.edu/software/bracken/
      documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
      tool_dev_url: https://github.com/jenniferlu717/Bracken
      doi: "10.7717/peerj-cs.104"
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - kraken_report:
      type: file
      description: TSV file with six columns coming from kraken2 output
      pattern: "*.{tsv}"
  - database:
      type: file
      description: Directory containing the kraken2/Bracken files for analysis
      pattern: "*"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - reports:
      type: file
      description: TSV output report of the re-estimated abundances
      pattern: "*.{tsv}"
 authors:
  - "@Midnighter"
--- a/modules/nf-core/bracken/combinebrackenoutputs/main.nf
+++ b/modules/nf-core/bracken/combinebrackenoutputs/main.nf
@ -0,0 +1,37 @@
 process BRACKEN_COMBINEBRACKENOUTPUTS {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::bracken=2.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0':
        'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }"
    input:
    tuple val(meta), path(input)
    output:
    tuple val(meta), path("*.txt"), emit: txt
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    // WARN: Version information not provided by tool on CLI.
    // Please update version string below when bumping container versions.
    def VERSION = '2.7'
    """
    combine_bracken_outputs.py \\
        $args \\
        --files ${input} \\
        -o ${prefix}.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        combine_bracken_output: ${VERSION}
    END_VERSIONS
    """
 }
--- a/modules/nf-core/bracken/combinebrackenoutputs/meta.yml
+++ b/modules/nf-core/bracken/combinebrackenoutputs/meta.yml
@ -0,0 +1,41 @@
 name: "bracken_combinebrackenoutputs"
 description: Combine output of metagenomic samples analyzed by bracken.
 keywords:
  - sort
 tools:
  - "bracken":
      description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample.
      homepage: https://ccb.jhu.edu/software/bracken/
      documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual
      tool_dev_url: https://github.com/jenniferlu717/Bracken
      doi: "10.7717/peerj-cs.104"
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - input:
      type: file
      description: List of output files from bracken
      pattern: "*"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: Combined output in table format
      pattern: "*.txt"
 authors:
  - "@jfy133"
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@ -0,0 +1,80 @@
 process CAT_FASTQ {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
        'ubuntu:20.04' }"
    input:
    tuple val(meta), path(reads, stageAs: "input*/*")
    output:
    tuple val(meta), path("*.merged.fastq.gz"), emit: reads
    path "versions.yml"                       , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
    if (meta.single_end) {
        if (readList.size >= 1) {
            """
            cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
            cat <<-END_VERSIONS > versions.yml
            "${task.process}":
                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
            END_VERSIONS
            """
        }
    } else {
        if (readList.size >= 2) {
            def read1 = []
            def read2 = []
            readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
            """
            cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz
            cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz
            cat <<-END_VERSIONS > versions.yml
            "${task.process}":
                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
            END_VERSIONS
            """
        }
    }
    stub:
    def prefix = task.ext.prefix ?: "${meta.id}"
    def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
    if (meta.single_end) {
        if (readList.size > 1) {
            """
            touch ${prefix}.merged.fastq.gz
            cat <<-END_VERSIONS > versions.yml
            "${task.process}":
                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
            END_VERSIONS
            """
        }
    } else {
        if (readList.size > 2) {
            """
            touch ${prefix}_1.merged.fastq.gz
            touch ${prefix}_2.merged.fastq.gz
            cat <<-END_VERSIONS > versions.yml
            "${task.process}":
                cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
            END_VERSIONS
            """
        }
    }
 }
--- a/modules/nf-core/cat/fastq/meta.yml
+++ b/modules/nf-core/cat/fastq/meta.yml
@ -0,0 +1,39 @@
 name: cat_fastq
 description: Concatenates fastq files
 keywords:
  - fastq
  - concatenate
 tools:
  - cat:
      description: |
        The cat utility reads files sequentially, writing them to the standard output.
      documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html
      licence: ["GPL-3.0-or-later"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: list
      description: |
        List of input FastQ files to be concatenated.
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: Merged fastq file
      pattern: "*.{merged.fastq.gz}"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@joseespinosa"
  - "@drpatelh"
--- a/modules/nf-core/centrifuge/centrifuge/main.nf
+++ b/modules/nf-core/centrifuge/centrifuge/main.nf
@ -0,0 +1,61 @@
 process CENTRIFUGE_CENTRIFUGE {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' :
        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
    input:
    tuple val(meta), path(reads)
    path db
    val save_unaligned
    val save_aligned
    val sam_format
    output:
    tuple val(meta), path('*report.txt')                 , emit: report
    tuple val(meta), path('*results.txt')                , emit: results
    tuple val(meta), path('*.sam')                       , optional: true, emit: sam
    tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz')   , optional: true, emit: fastq_mapped
    tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped
    path "versions.yml"                                  , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def paired = meta.single_end ? "-U ${reads}" :  "-1 ${reads[0]} -2 ${reads[1]}"
    def unaligned = ''
    def aligned = ''
    if (meta.single_end) {
        unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : ''
        aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : ''
    } else {
        unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : ''
        aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : ''
    }
    def sam_output = sam_format ? "--out-fmt 'sam'" : ''
    """
    ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included
    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/.1.cf//'`
    centrifuge \\
        -x \$db_name \\
        -p $task.cpus \\
        $paired \\
        --report-file ${prefix}.report.txt \\
        -S ${prefix}.results.txt \\
        $unaligned \\
        $aligned \\
        $sam_output \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        centrifuge: \$( centrifuge --version  | sed -n 1p | sed 's/^.*centrifuge-class version //')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/centrifuge/centrifuge/meta.yml
+++ b/modules/nf-core/centrifuge/centrifuge/meta.yml
@ -0,0 +1,66 @@
 name: centrifuge_centrifuge
 description: Classifies metagenomic sequence data
 keywords:
  - classify
  - metagenomics
  - fastq
  - db
 tools:
  - centrifuge:
      description: Centrifuge is a classifier for metagenomic sequences.
      homepage: https://ccb.jhu.edu/software/centrifuge/
      documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
      doi: 10.1101/gr.210641.116
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
  - db:
      type: directory
      description: Path to directory containing centrifuge database files
  - save_unaligned:
      type: value
      description: If true unmapped fastq files are saved
  - save_aligned:
      type: value
      description: If true mapped fastq files are saved
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - report:
      type: file
      description: |
        File containing a classification summary
      pattern: "*.{report.txt}"
  - results:
      type: file
      description: |
        File containing classification results
      pattern: "*.{results.txt}"
  - fastq_unmapped:
      type: file
      description: Unmapped fastq files
      pattern: "*.unmapped.fastq.gz"
  - fastq_mapped:
      type: file
      description: Mapped fastq files
      pattern: "*.mapped.fastq.gz"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@sofstam"
  - "@jfy133"
  - "@sateeshperi"
--- a/modules/nf-core/centrifuge/kreport/main.nf
+++ b/modules/nf-core/centrifuge/kreport/main.nf
@ -0,0 +1,33 @@
 process CENTRIFUGE_KREPORT {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
    input:
    tuple val(meta), path(report)
    path db
    output:
    tuple val(meta), path('*.txt')                , emit: kreport
    path "versions.yml"                                  , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/.1.cf//'`
    centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        centrifuge: \$( centrifuge --version  | sed -n 1p | sed 's/^.*centrifuge-class version //')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/centrifuge/kreport/meta.yml
+++ b/modules/nf-core/centrifuge/kreport/meta.yml
@ -0,0 +1,41 @@
 name: "centrifuge_kreport"
 description: Creates Kraken-style reports from centrifuge out files
 keywords:
  - metagenomics
 tools:
  - centrifuge:
      description: Centrifuge is a classifier for metagenomic sequences.
      homepage: https://ccb.jhu.edu/software/centrifuge/
      documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
      doi: 10.1101/gr.210641.116
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - report:
      type: file
      description: File containing the centrifuge classification report
      pattern: "*.{txt}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - kreport:
      type: file
      description: |
        File containing kraken-style report from centrifuge
        out files.
      pattern: "*.{txt}"
 authors:
  - "@sofstam"
  - "@jfy133"
--- a/modules/nf-core/diamond/blastx/main.nf
+++ b/modules/nf-core/diamond/blastx/main.nf
@ -0,0 +1,68 @@
 process DIAMOND_BLASTX {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
        'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
    input:
    tuple val(meta), path(fasta)
    path db
    val out_ext
    val blast_columns
    output:
    tuple val(meta), path('*.blast'), optional: true, emit: blast
    tuple val(meta), path('*.xml')  , optional: true, emit: xml
    tuple val(meta), path('*.txt')  , optional: true, emit: txt
    tuple val(meta), path('*.daa')  , optional: true, emit: daa
    tuple val(meta), path('*.sam')  , optional: true, emit: sam
    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
    tuple val(meta), path('*.paf')  , optional: true, emit: paf
    tuple val(meta), path("*.log")                  , emit: log
    path "versions.yml"                               , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def columns = blast_columns ? "${blast_columns}" : ''
    switch ( out_ext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
        case "txt": outfmt = 6; break
        case "daa": outfmt = 100; break
        case "sam": outfmt = 101; break
        case "tsv": outfmt = 102; break
        case "paf": outfmt = 103; break
        default:
            outfmt = '6';
            out_ext = 'txt';
            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
            break
    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
    diamond \\
        blastx \\
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
        --outfmt ${outfmt} ${columns} \\
        $args \\
        --out ${prefix}.${out_ext} \\
        --log
    mv diamond.log ${prefix}.log
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        diamond: \$(diamond --version 2>&1 | tail -n 1 | sed 's/^diamond version //')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/diamond/blastx/meta.yml
+++ b/modules/nf-core/diamond/blastx/meta.yml
@ -0,0 +1,81 @@
 name: diamond_blastx
 description: Queries a DIAMOND database using blastx mode
 keywords:
  - fasta
  - diamond
  - blastx
  - DNA sequence
 tools:
  - diamond:
      description: Accelerated BLAST compatible local sequence aligner
      homepage: https://github.com/bbuchfink/diamond
      documentation: https://github.com/bbuchfink/diamond/wiki
      tool_dev_url: https://github.com/bbuchfink/diamond
      doi: "doi:10.1038/s41592-021-01101-x"
      licence: ["GPL v3.0"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - fasta:
      type: file
      description: Input fasta file containing query sequences
      pattern: "*.{fa,fasta}"
  - db:
      type: directory
      description: Directory containing the nucelotide blast database
      pattern: "*"
  - out_ext:
      type: string
      description: |
        Specify the type of output file to be generated. `blast` corresponds to
        BLAST pairwise format. `xml` corresponds to BLAST xml format.
        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
        taxonomic classification format.
      pattern: "blast|xml|txt|daa|sam|tsv|paf"
 output:
  - blast:
      type: file
      description: File containing blastp hits
      pattern: "*.{blast}"
  - xml:
      type: file
      description: File containing blastp hits
      pattern: "*.{xml}"
  - txt:
      type: file
      description: File containing hits in tabular BLAST format.
      pattern: "*.{txt}"
  - daa:
      type: file
      description: File containing hits DAA format
      pattern: "*.{daa}"
  - sam:
      type: file
      description: File containing aligned reads in SAM format
      pattern: "*.{sam}"
  - tsv:
      type: file
      description: Tab separated file containing taxonomic classification of hits
      pattern: "*.{tsv}"
  - paf:
      type: file
      description: File containing aligned reads in pairwise mapping format format
      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - log:
      type: file
      description: Log file containing stdout information
      pattern: "*.{log}"
 authors:
  - "@spficklin"
  - "@jfy133"
  - "@mjamy"
--- a/modules/nf-core/falco/falco.diff
+++ b/modules/nf-core/falco/falco.diff
@ -0,0 +1,16 @@
 Changes in module 'nf-core/falco'
 --- modules/nf-core/falco/main.nf
 +++ modules/nf-core/falco/main.nf
@@ -33,7 +33,9 @@
         """
     } else {
         """
 -        falco $args --threads $task.cpus ${reads}
 +        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
 +        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
 +        falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
 ************************************************************
--- a/modules/nf-core/falco/main.nf
+++ b/modules/nf-core/falco/main.nf
@ -0,0 +1,59 @@
 process FALCO {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::falco=1.2.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3':
        'quay.io/biocontainers/falco:1.2.1--h867801b_3' }"
    input:
    tuple val(meta), path(reads)
    output:
    tuple val(meta), path("*.html"), emit: html
    tuple val(meta), path("*.txt") , emit: txt
    path  "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    if ( reads.toList().size() == 1 ) {
        """
        falco $args --threads $task.cpus ${reads} -D ${prefix}_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            falco:\$( falco --version | sed -e "s/falco//g" )
        END_VERSIONS
        """
    } else {
        """
        [ ! -f  ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
        [ ! -f  ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
        falco $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            falco:\$( falco --version | sed -e "s/falco//g" )
        END_VERSIONS
        """
    }
    stub:
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    touch ${prefix}_data.txt
    touch ${prefix}_fastqc_data.html
    touch ${prefix}_summary.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        falco: \$( falco --version | sed -e "s/falco v//g" )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/falco/meta.yml
+++ b/modules/nf-core/falco/meta.yml
@ -0,0 +1,52 @@
 name: falco
 description: Run falco on sequenced reads
 keywords:
  - quality control
  - qc
  - adapters
  - fastq
 tools:
  - fastqc:
      description: "falco is a drop-in C++ implementation of FastQC to assess the quality of sequence reads."
      homepage: "https://falco.readthedocs.io/"
      documentation: "https://falco.readthedocs.io/"
      tool_dev_url: "None"
      doi: ""
      licence: "['GPL v3']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - html:
      type: file
      description: FastQC like report
      pattern: "*_{fastqc_report.html}"
  - txt:
      type: file
      description: falco report data
      pattern: "*_{data.txt}"
  - txt:
      type: file
      description: falco summary file
      pattern: "*_{summary.txt}"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@lucacozzuto"
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@ -0,0 +1,103 @@
 process FASTP {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' :
        'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }"
    input:
    tuple val(meta), path(reads)
    path  adapter_fasta
    val   save_trimmed_fail
    val   save_merged
    output:
    tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
    tuple val(meta), path('*.json')           , emit: json
    tuple val(meta), path('*.html')           , emit: html
    tuple val(meta), path('*.log')            , emit: log
    path "versions.yml"                       , emit: versions
    tuple val(meta), path('*.fail.fastq.gz')  , optional:true, emit: reads_fail
    tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
    def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
    // Added soft-links to original fastqs for consistent naming in MultiQC
    // Use single ended for interleaved. Add --interleaved_in in config.
    if ( task.ext.args?.contains('--interleaved_in') ) {
        """
        [ ! -f  ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
        fastp \\
            --stdout \\
            --in1 ${prefix}.fastq.gz \\
            --thread $task.cpus \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
            $adapter_list \\
            $fail_fastq \\
            $args \\
            2> ${prefix}.fastp.log \\
        | gzip -c > ${prefix}.fastp.fastq.gz
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
        END_VERSIONS
        """
    } else if (meta.single_end) {
        """
        [ ! -f  ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
        fastp \\
            --stdout \\
            --in1 ${prefix}.fastq.gz \\
            --out1  ${prefix}.fastp.fastq.gz \\
            --thread $task.cpus \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
            $adapter_list \\
            $fail_fastq \\
            $args \\
            2> ${prefix}.fastp.log
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
        END_VERSIONS
        """
    } else {
        def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
        """
        [ ! -f  ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
        [ ! -f  ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
        fastp \\
            --in1 ${prefix}_1.fastq.gz \\
            --in2 ${prefix}_2.fastq.gz \\
            --out1 ${prefix}_1.fastp.fastq.gz \\
            --out2 ${prefix}_2.fastp.fastq.gz \\
            --json ${prefix}.fastp.json \\
            --html ${prefix}.fastp.html \\
            $adapter_list \\
            $fail_fastq \\
            $merge_fastq \\
            --thread $task.cpus \\
            --detect_adapter_for_pe \\
            $args \\
            2> ${prefix}.fastp.log
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
        END_VERSIONS
        """
    }
 }
--- a/modules/nf-core/fastp/meta.yml
+++ b/modules/nf-core/fastp/meta.yml
@ -0,0 +1,73 @@
 name: fastp
 description: Perform adapter/quality trimming on sequencing reads
 keywords:
  - trimming
  - quality control
  - fastq
 tools:
  - fastp:
      description: |
        A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
      documentation: https://github.com/OpenGene/fastp
      doi: https://doi.org/10.1093/bioinformatics/bty560
      licence: ["MIT"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively. If you wish to run interleaved paired-end data,  supply as single-end data
        but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
  - adapter_fasta:
      type: file
      description: File in FASTA format containing possible adapters to remove.
      pattern: "*.{fasta,fna,fas,fa}"
  - save_trimmed_fail:
      type: boolean
      description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
  - save_merged:
      type: boolean
      description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: The trimmed/modified/unmerged fastq reads
      pattern: "*fastp.fastq.gz"
  - json:
      type: file
      description: Results in JSON format
      pattern: "*.json"
  - html:
      type: file
      description: Results in HTML format
      pattern: "*.html"
  - log:
      type: file
      description: fastq log file
      pattern: "*.log"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - reads_fail:
      type: file
      description: Reads the failed the preprocessing
      pattern: "*fail.fastq.gz"
  - reads_merged:
      type: file
      description: Reads that were successfully merged
      pattern: "*.{merged.fastq.gz}"
 authors:
  - "@drpatelh"
  - "@kevinmenden"
--- a/modules/nf-core/filtlong/main.nf
+++ b/modules/nf-core/filtlong/main.nf
@ -0,0 +1,39 @@
 process FILTLONG {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' :
        'quay.io/biocontainers/filtlong:0.2.1--h9a82719_0' }"
    input:
    tuple val(meta), path(shortreads), path(longreads)
    output:
    tuple val(meta), path("*.fastq.gz"), emit: reads
    tuple val(meta), path("*.log")     , emit: log
    path "versions.yml"                 , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
    if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!"
    """
    filtlong \\
        $short_reads \\
        $args \\
        $longreads \\
        2> ${prefix}.log \\
        | gzip -n > ${prefix}.fastq.gz
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/filtlong/meta.yml
+++ b/modules/nf-core/filtlong/meta.yml
@ -0,0 +1,55 @@
 name: filtlong
 description: Filtlong filters long reads based on quality measures or short read data.
 keywords:
  - nanopore
  - quality control
  - QC
  - filtering
  - long reads
  - short reads
 tools:
  - filtlong:
      description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
      homepage: https://anaconda.org/bioconda/filtlong
      documentation: None
      tool_dev_url: https://github.com/rrwick/Filtlong
      doi: ""
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - shortreads:
      type: file
      description: fastq file
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
  - longreads:
      type: file
      description: fastq file
      pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - reads:
      type: file
      description: Filtered (compressed) fastq file
      pattern: "*.fastq.gz"
  - log:
      type: file
      description: Standard error logging file containing summary statistics
      pattern: "*.log"
 authors:
  - "@d4straub"
  - "@sofstam"
--- a/modules/nf-core/gunzip/main.nf
+++ b/modules/nf-core/gunzip/main.nf
@ -0,0 +1,44 @@
 process GUNZIP {
    tag "$archive"
    label 'process_single'
    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
        'ubuntu:20.04' }"
    input:
    tuple val(meta), path(archive)
    output:
    tuple val(meta), path("$gunzip"), emit: gunzip
    path "versions.yml"             , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    gunzip = archive.toString() - '.gz'
    """
    gunzip \\
        -f \\
        $args \\
        $archive
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
    END_VERSIONS
    """
    stub:
    gunzip = archive.toString() - '.gz'
    """
    touch $gunzip
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/gunzip/meta.yml
+++ b/modules/nf-core/gunzip/meta.yml
@ -0,0 +1,34 @@
 name: gunzip
 description: Compresses and decompresses files.
 keywords:
  - gunzip
  - compression
 tools:
  - gunzip:
    description: |
      gzip is a file format and a software application used for file compression and decompression.
    documentation: https://www.gnu.org/software/gzip/manual/gzip.html
    licence: ["GPL-3.0-or-later"]
 input:
  - meta:
    type: map
    description: |
      Optional groovy Map containing meta information
      e.g. [ id:'test', single_end:false ]
  - archive:
    type: file
    description: File to be compressed/uncompressed
    pattern: "*.*"
 output:
  - gunzip:
    type: file
    description: Compressed/uncompressed file
    pattern: "*.*"
  - versions:
    type: file
    description: File containing software versions
    pattern: "versions.yml"
 authors:
  - "@joseespinosa"
  - "@drpatelh"
  - "@jfy133"
--- a/modules/nf-core/kaiju/kaiju/main.nf
+++ b/modules/nf-core/kaiju/kaiju/main.nf
@ -0,0 +1,41 @@
 process KAIJU_KAIJU {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
    input:
    tuple val(meta), path(reads)
    path(db)
    output:
    tuple val(meta), path('*.tsv'), emit: results
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
    """
    dbnodes=`find -L ${db} -name "*nodes.dmp"`
    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
    kaiju \\
        $args \\
        -z $task.cpus \\
        -t \$dbnodes \\
        -f \$dbname \\
        -o ${prefix}.tsv \\
        $input
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
    END_VERSIONS
    """
 }
--- a/modules/nf-core/kaiju/kaiju/meta.yml
+++ b/modules/nf-core/kaiju/kaiju/meta.yml
@ -0,0 +1,53 @@
 name: kaiju_kaiju
 description: Taxonomic classification of metagenomic sequence data using a protein reference database
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
 tools:
  - kaiju:
      description: Fast and sensitive taxonomic classification for metagenomics
      homepage: https://kaiju.binf.ku.dk/
      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
      doi: "10.1038/ncomms11257"
      licence: ["GNU GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
        respectively.
      pattern: "*.{fastq,fq,fasta,fa,fsa,fas,fna,fastq.gz,fq.gz,fasta.gz,fa.gz,fsa.gz,fas.gz,fna.gz}"
  - db:
      type: files
      description: |
        List containing the database and nodes files for Kaiju
        e.g. [ 'database.fmi', 'nodes.dmp' ]
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - results:
      type: file
      description: Results with taxonomic classification of each read
      pattern: "*.tsv"
 authors:
  - "@talnor"
  - "@sofstam"
  - "@jfy133"
--- a/modules/nf-core/kaiju/kaiju2krona/main.nf
+++ b/modules/nf-core/kaiju/kaiju2krona/main.nf
@ -0,0 +1,39 @@
 process KAIJU_KAIJU2KRONA {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
    input:
    tuple val(meta), path(tsv)
    path(db)
    output:
    tuple val(meta), path("*.txt"), emit: txt
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    dbnodes=`find -L ${db} -name "*nodes.dmp"`
    dbnames=`find -L ${db} -name "*names.dmp"`
    kaiju2krona \\
        $args \\
        -t \$dbnodes \\
        -n \$dbnames \\
        -i ${tsv} \\
        -o ${prefix}.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
    END_VERSIONS
    """
 }
--- a/modules/nf-core/kaiju/kaiju2krona/meta.yml
+++ b/modules/nf-core/kaiju/kaiju2krona/meta.yml
@ -0,0 +1,44 @@
 name: kaiju_kaiju2krona
 description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona.
 keywords:
  - taxonomy
  - visualisation
  - krona chart
  - metagenomics
 tools:
  - "kaiju":
      description: Fast and sensitive taxonomic classification for metagenomics
      homepage: https://kaiju.binf.ku.dk/
      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
      doi: "10.1038/ncomms11257"
      licence: ["GNU GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - tsv:
      type: file
      description: Kaiju tab-separated output file
      pattern: "*.{tsv,txt}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: Krona text-based input file converted from Kaiju report
      pattern: "*.{txt,krona}"
 authors:
  - "@MillironX"
--- a/modules/nf-core/kaiju/kaiju2table/main.nf
+++ b/modules/nf-core/kaiju/kaiju2table/main.nf
@ -0,0 +1,40 @@
 process KAIJU_KAIJU2TABLE {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
        'quay.io/biocontainers/kaiju:1.8.2--h2e03b76_0' }"
    input:
    tuple val(meta), path(results)
    path db
    val taxon_rank
    output:
    tuple val(meta), path('*.txt'), emit: summary
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    dbnodes=`find -L ${db} -name "*nodes.dmp"`
    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
    kaiju2table   $args \\
        -t \$dbnodes \\
        -n \$dbname \\
        -r ${taxon_rank} \\
        -o ${prefix}.txt \\
        ${results}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
    END_VERSIONS
    """
 }
--- a/modules/nf-core/kaiju/kaiju2table/meta.yml
+++ b/modules/nf-core/kaiju/kaiju2table/meta.yml
@ -0,0 +1,50 @@
 name: "kaiju_kaiju2table"
 description: write your description here
 keywords:
  - classify
  - metagenomics
 tools:
  - kaiju:
      description: Fast and sensitive taxonomic classification for metagenomics
      homepage: https://kaiju.binf.ku.dk/
      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
      doi: "10.1038/ncomms11257"
      licence: ["GNU GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - results:
      type: file
      description: File containing the kaiju classification results
      pattern: "*.{txt}"
  - taxon_rank:
      type: string
      description: |
        Taxonomic rank to display in report
        pattern: "phylum|class|order|family|genus|species"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - results:
      type: file
      description: |
        Summary table for a given taxonomic rank
      pattern: "*.{tsv}"
 authors:
  - "@sofstam"
  - "@talnor"
  - "@jfy133"
--- a/modules/nf-core/kraken2/kraken2/main.nf
+++ b/modules/nf-core/kraken2/kraken2/main.nf
@ -0,0 +1,58 @@
 process KRAKEN2_KRAKEN2 {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
        'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
    input:
    tuple val(meta), path(reads)
    path  db
    val save_output_fastqs
    val save_reads_assignment
    output:
    tuple val(meta), path('*.classified{.,_}*')     , optional:true, emit: classified_reads_fastq
    tuple val(meta), path('*.unclassified{.,_}*')   , optional:true, emit: unclassified_reads_fastq
    tuple val(meta), path('*classifiedreads.txt')   , optional:true, emit: classified_reads_assignment
    tuple val(meta), path('*report.txt')                           , emit: report
    path "versions.yml"                                            , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def paired       = meta.single_end ? "" : "--paired"
    def classified   = meta.single_end ? "${prefix}.classified.fastq"   : "${prefix}.classified#.fastq"
    def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
    def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ""
    def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
    def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
    def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
    """
    kraken2 \\
        --db $db \\
        --threads $task.cpus \\
        --report ${prefix}.kraken2.report.txt \\
        --gzip-compressed \\
        $unclassified_option \\
        $classified_option \\
        $readclassification_option \\
        $paired \\
        $args \\
        $reads
    $compress_reads_command
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//')
        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/kraken2/kraken2/meta.yml
+++ b/modules/nf-core/kraken2/kraken2/meta.yml
@ -0,0 +1,75 @@
 name: kraken2_kraken2
 description: Classifies metagenomic sequence data
 keywords:
  - classify
  - metagenomics
  - fastq
  - db
 tools:
  - kraken2:
      description: |
        Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads
      homepage: https://ccb.jhu.edu/software/kraken2/
      documentation: https://github.com/DerrickWood/kraken2/wiki/Manual
      doi: 10.1186/s13059-019-1891-0
      licence: ["MIT"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
  - db:
      type: directory
      description: Kraken2 database
  - save_output_fastqs:
      type: boolean
      description: |
        If true, optional commands are added to save classified and unclassified reads
        as fastq files
  - save_reads_assignment:
      type: boolean
      description: |
        If true, an optional command is added to save a file reporting the taxonomic
        classification of each input read
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - classified_reads_fastq:
      type: file
      description: |
        Reads classified as belonging to any of the taxa
        on the Kraken2 database.
      pattern: "*{fastq.gz}"
  - unclassified_reads_fastq:
      type: file
      description: |
        Reads not classified to any of the taxa
        on the Kraken2 database.
      pattern: "*{fastq.gz}"
  - classified_reads_assignment:
      type: file
      description: |
        Kraken2 output file indicating the taxonomic assignment of
        each input read
  - report:
      type: file
      description: |
        Kraken2 report containing stats about classified
        and not classifed reads.
      pattern: "*.{report.txt}"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@joseespinosa"
  - "@drpatelh"
--- a/modules/nf-core/krakentools/combinekreports/main.nf
+++ b/modules/nf-core/krakentools/combinekreports/main.nf
@ -0,0 +1,34 @@
 process KRAKENTOOLS_COMBINEKREPORTS {
    label 'process_single'
    conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
        'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
    input:
    tuple val(meta), path(kreports)
    output:
    tuple val(meta), path("*.txt"), emit: txt
    path "versions.yml", emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${meta.id}"
    def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
    """
    combine_kreports.py \\
        -r ${kreports} \\
        -o ${prefix}.txt \\
        ${args}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        combine_kreports.py: ${VERSION}
    END_VERSIONS
    """
 }
--- a/modules/nf-core/krakentools/combinekreports/meta.yml
+++ b/modules/nf-core/krakentools/combinekreports/meta.yml
@ -0,0 +1,43 @@
 name: krakentools_combinekreports
 description: Takes a Kraken report file and prints out a krona-compatible TEXT file
 keywords:
  - kraken
  - krakentools
  - metagenomics
  - table
  - combining
  - merging
 tools:
  - krakentools:
      description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
      homepage: https://github.com/jenniferlu717/KrakenTools
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - kreports:
      type: file
      description: List of kraken-style report files
      pattern: "*.{txt,kreport}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: Combined kreport file of all input files
      pattern: "*.txt"
 authors:
  - "@jfy133"
--- a/modules/nf-core/krakentools/kreport2krona/main.nf
+++ b/modules/nf-core/krakentools/kreport2krona/main.nf
@ -0,0 +1,36 @@
 process KRAKENTOOLS_KREPORT2KRONA {
    tag "$meta.id"
    label 'process_single'
    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
    conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
        'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
    input:
    tuple val(meta), path(kreport)
    output:
    tuple val(meta), path("*.txt"), emit: txt
    path "versions.yml", emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
    """
    kreport2krona.py \\
        -r ${kreport} \\
        -o ${prefix}.txt \\
        ${args}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        kreport2krona.py: ${VERSION}
    END_VERSIONS
    """
 }
--- a/modules/nf-core/krakentools/kreport2krona/meta.yml
+++ b/modules/nf-core/krakentools/kreport2krona/meta.yml
@ -0,0 +1,41 @@
 name: krakentools_kreport2krona
 description: Takes a Kraken report file and prints out a krona-compatible TEXT file
 keywords:
  - kraken
  - krona
  - metagenomics
  - visualization
 tools:
  - krakentools:
      description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
      homepage: https://github.com/jenniferlu717/KrakenTools
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - kreport:
      type: file
      description: Kraken report
      pattern: "*.{txt,kreport}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - krona:
      type: file
      description: Krona text-based input file converted from Kraken report
      pattern: "*.{txt,krona}"
 authors:
  - "@MillironX"
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/main.nf
@ -0,0 +1,224 @@
 process KRAKENUNIQ_PRELOADEDKRAKENUNIQ {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? "bioconda::krakenuniq=1.0.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krakenuniq:1.0.0--pl5321h19e8d03_0':
        'quay.io/biocontainers/krakenuniq:1.0.0--pl5321h19e8d03_0' }"
    input:
    tuple val(meta), path(fastqs)
    path  db
    val ram_chunk_size
    val save_output_fastqs
    val report_file
    val save_output
    output:
    tuple val(meta), path('*.classified{.,_}*')     , optional:true, emit: classified_reads_fastq
    tuple val(meta), path('*.unclassified{.,_}*')   , optional:true, emit: unclassified_reads_fastq
    tuple val(meta), path('*classified.txt')        , optional:true, emit: classified_assignment
    tuple val(meta), path('*report.txt')                           , emit: report
    path "versions.yml"                                            , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def args2 = task.ext.args ?: ''
    def classified   = meta.single_end ? '"\${PREFIX}.classified.fastq"'   : '"\${PREFIX}.classified#.fastq"'
    def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
    def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
    def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
    def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
    def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
    def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
    if (meta.single_end) {
        """
        krakenuniq \\
            --db $db \\
            --preload \\
            --preload-size $ram_chunk_size \\
            --threads $task.cpus \\
            $args
        strip_suffix() {
            local result=\$1
            # Strip any file extensions.
            echo "\${result%%.*}"
        }
        printf "%s\\n" ${fastqs} | while read FASTQ; do \\
            PREFIX="\$(strip_suffix "\${FASTQ}")"
            krakenuniq \\
                --db $db \\
                --threads $task.cpus \\
                $report \\
                $output_option \\
                $unclassified_option \\
                $classified_option \\
                $output_option \\
                $args2 \\
                "\${FASTQ}"
        done
        $compress_reads_command
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
        END_VERSIONS
        """
    } else {
        """
        krakenuniq \\
            --db $db \\
            --preload \\
            --preload-size $ram_chunk_size \\
            --threads $task.cpus \\
            $args
        strip_suffix() {
            local result
            read result
            # Strip any trailing dot or underscore.
            result="\${result%_}"
            echo "\${result%.}"
        }
        printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
            read -r -a FASTQ <<< "\${FASTQ}"
            PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" |  sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
            krakenuniq \\
                --db $db \\
                --threads $task.cpus \\
                $report \\
                $output_option \\
                $unclassified_option \\
                $classified_option \\
                $output_option \\
                --paired \\
                $args2 \\
                "\${FASTQ[@]}"
        done
        $compress_reads_command
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
        END_VERSIONS
        """
    }
    stub:
    def args = task.ext.args ?: ''
    def args2 = task.ext.args ?: ''
    def classified   = meta.single_end ? '"\${PREFIX}.classified.fastq"'   : '"\${PREFIX}.classified#.fastq"'
    def unclassified = meta.single_end ? '"\${PREFIX}.unclassified.fastq"' : '"\${PREFIX}.unclassified#.fastq"'
    def classified_option = save_output_fastqs ? "--classified-out ${classified}" : ''
    def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : ''
    def output_option = save_output ? '--output "\${PREFIX}.krakenuniq.classified.txt"' : ''
    def report = report_file ? '--report-file "\${PREFIX}.krakenuniq.report.txt"' : ''
    def compress_reads_command = save_output_fastqs ? 'gzip --no-name *.fastq' : ''
    if (meta.single_end) {
        """
        echo krakenuniq \\
            --db $db \\
            --preload \\
            --preload-size $ram_chunk_size \\
            --threads $task.cpus \\
            $args
        strip_suffix() {
            local result=\$1
            # Strip any file extensions.
            echo "\${result%%.*}"
        }
        printf "%s\\n" ${fastqs} | while read FASTQ; do \\
            echo "\${FASTQ}"
            PREFIX="\$(strip_suffix "\${FASTQ}")"
            echo "\${PREFIX}"
            echo krakenuniq \\
                --db $db \\
                --threads $task.cpus \\
                $report \\
                $output_option \\
                $unclassified_option \\
                $classified_option \\
                $output_option \\
                $args2 \\
                "\${FASTQ}"
            touch "\${PREFIX}.classified.fastq.gz"
            touch "\${PREFIX}.krakenuniq.classified.txt"
            touch "\${PREFIX}.krakenuniq.report.txt"
            touch "\${PREFIX}.unclassified.fastq.gz"
        done
        echo $compress_reads_command
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
        END_VERSIONS
        """
    } else {
        """
        echo krakenuniq \\
            --db $db \\
            --preload \\
            --preload-size $ram_chunk_size \\
            --threads $task.cpus \\
            $args
        strip_suffix() {
            local result
            read result
            # Strip any trailing dot or underscore.
            result="\${result%_}"
            echo "\${result%.}"
        }
        printf "%s %s\\n" ${fastqs} | while read FASTQ; do \\
            read -r -a FASTQ <<< "\${FASTQ}"
            echo "\${FASTQ[@]}"
            PREFIX="\$(printf "%s\\n" "\${FASTQ[@]}" |  sed -e 'N;s/^\\(.*\\).*\\n\\1.*\$/\\1\\n\\1/;D' | strip_suffix)"
            echo "\${PREFIX}"
            echo krakenuniq \\
                --db $db \\
                --threads $task.cpus \\
                $report \\
                $output_option \\
                $unclassified_option \\
                $classified_option \\
                $output_option \\
                --paired \\
                $args2 \\
                "\${FASTQ[@]}"
            touch "\${PREFIX}.classified_1.fastq.gz" "\${PREFIX}.classified_2.fastq.gz"
            touch "\${PREFIX}.krakenuniq.classified.txt"
            touch "\${PREFIX}.krakenuniq.report.txt"
            touch "\${PREFIX}.unclassified_1.fastq.gz" "\${PREFIX}.unclassified_2.fastq.gz"
        done
        echo $compress_reads_command
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            krakenuniq: \$(echo \$(krakenuniq --version 2>&1) | sed 's/^.*KrakenUniq version //; s/ .*\$//')
        END_VERSIONS
        """
    }
 }
--- a/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
+++ b/modules/nf-core/krakenuniq/preloadedkrakenuniq/meta.yml
@ -0,0 +1,78 @@
 name: "krakenuniq_preloadedkrakenuniq"
 description: Classifies metagenomic sequence data using unique k-mer counts
 keywords:
  - classify
  - metagenomics
  - kmers
  - fastq
  - db
 tools:
  - "krakenuniq":
      description: "Metagenomics classifier with unique k-mer counting for more specific results"
      homepage: https://github.com/fbreitwieser/krakenuniq
      documentation: https://github.com/fbreitwieser/krakenuniq
      doi: 10.1186/s13059-018-1568-0
      licence: ["MIT"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - fastqs:
      type: file
      description: List of input FastQ files
  - db:
      type: directory
      description: KrakenUniq database
  - ram_chunk_size:
      type: val
      description: Amount of maximum amount of RAM each chunk of database that should be loaded at any one time
      pattern: "*GB"
  - save_output_fastqs:
      type: boolean
      description: |
        If true, optional commands are added to save classified and unclassified reads
        as fastq files
  - save_reads_assignment:
      type: boolean
      description: |
        If true, an optional command is added to save a file reporting the taxonomic
        classification of each input read
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - classified_reads_fastq:
      type: file
      description: |
        Reads classified as belonging to any of the taxa
        on the KrakenUniq database.
      pattern: "*.fastq.gz"
  - unclassified_reads_fastq:
      type: file
      description: |
        Reads not classified to any of the taxa
        on the KrakenUniq database.
      pattern: "*.fastq.gz"
  - classified_assignment:
      type: file
      description: |
        KrakenUniq output file indicating the taxonomic assignment of
        each input read ## DOUBLE CHECK!!
  - report:
      type: file
      description: |
        KrakenUniq report containing stats about classified
        and not classifed reads.
      pattern: "*.report.txt"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@mjamy"
  - "@Midnighter"
--- a/modules/nf-core/krona/ktimporttaxonomy/main.nf
+++ b/modules/nf-core/krona/ktimporttaxonomy/main.nf
@ -0,0 +1,41 @@
 process KRONA_KTIMPORTTAXONOMY {
    tag "${meta.id}"
    label 'process_single'
    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
    conda (params.enable_conda ? "bioconda::krona=2.8" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krona:2.8--pl5262hdfd78af_2' :
        'quay.io/biocontainers/krona:2.8--pl5262hdfd78af_2' }"
    input:
    tuple val(meta), path(report)
    path taxonomy, stageAs: 'taxonomy.tab'
    output:
    tuple val(meta), path ('*.html'), emit: html
    path "versions.yml"             , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def VERSION = '2.8' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
    """
    TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;)
    echo \$TAXONOMY
    ktImportTaxonomy \\
        $args \\
        -o ${prefix}.html \\
        -tax \$TAXONOMY/ \\
        $report
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        krona: $VERSION
    END_VERSIONS
    """
 }
--- a/modules/nf-core/krona/ktimporttaxonomy/meta.yml
+++ b/modules/nf-core/krona/ktimporttaxonomy/meta.yml
@ -0,0 +1,48 @@
 name: krona_ktimporttaxonomy
 description: KronaTools Import Taxonomy imports taxonomy classifications and produces an interactive Krona plot.
 keywords:
  - plot
  - taxonomy
  - interactive
  - html
  - visualisation
  - krona chart
 tools:
  - krona:
      description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
      homepage: https://github.com/marbl/Krona/wiki/KronaTools
      documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
      tool_dev_url:
      doi: https://doi.org/10.1186/1471-2105-12-385
      licence:
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test']
  - database:
      type: file
      description: |
        Path to a Krona taxonomy .tab file normally downloaded and generated by
        krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but
        must end in `.tab`.
      pattern: "*tab"
  - report:
      type: file
      description: "A tab-delimited file with taxonomy IDs and (optionally) query IDs, magnitudes, and scores.  Query IDs are taken from column 1, taxonomy IDs from column 2, and scores from column 3.  Lines beginning with # will be ignored."
      pattern: "*.{tsv}"
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - html:
      type: file
      description: A html file containing an interactive krona plot.
      pattern: "*.{html}"
 authors:
  - "@mjakobs"
--- a/modules/nf-core/krona/ktimporttext/main.nf
+++ b/modules/nf-core/krona/ktimporttext/main.nf
@ -0,0 +1,34 @@
 process KRONA_KTIMPORTTEXT {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
        'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
    input:
    tuple val(meta), path(report)
    output:
    tuple val(meta), path ('*.html'), emit: html
    path "versions.yml"             , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    ktImportText  \\
        $args \\
        -o ${prefix}.html \\
        $report
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/krona/ktimporttext/meta.yml
+++ b/modules/nf-core/krona/ktimporttext/meta.yml
@ -0,0 +1,47 @@
 name: "krona_ktimporttext"
 description: Creates a Krona chart from text files listing quantities and lineages.
 keywords:
  - plot
  - taxonomy
  - interactive
  - html
  - visualisation
  - krona chart
  - metagenomics
 tools:
  - krona:
      description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
      homepage: https://github.com/marbl/Krona/wiki/KronaTools
      documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
      tool_dev_url: https://github.com/marbl/Krona
      doi: 10.1186/1471-2105-12-385
      licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test']
  - report:
      type: file
      description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
      pattern: "*.{txt}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test' ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - html:
      type: file
      description: A html file containing an interactive krona plot.
      pattern: "*.{html}"
 authors:
  - "@jianhong"
--- a/modules/nf-core/malt/run/main.nf
+++ b/modules/nf-core/malt/run/main.nf
@ -0,0 +1,47 @@
 process MALT_RUN {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? "bioconda::malt=0.61" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/malt:0.61--hdfd78af_0' :
        'quay.io/biocontainers/malt:0.61--hdfd78af_0' }"
    input:
    tuple val(meta), path(fastqs)
    path index
    output:
    tuple val(meta), path("*.rma6")                          , emit: rma6
    tuple val(meta), path("*.{tab,text,sam}"),  optional:true, emit: alignments
    tuple val(meta), path("*.log")                           , emit: log
    path "versions.yml"                                      , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def avail_mem = 6
    if (!task.memory) {
        log.info '[MALT_RUN] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.giga
    }
    """
    malt-run \\
        -t $task.cpus \\
        -v \\
        -o . \\
        $args \\
        --inFile ${fastqs.join(' ')} \\
        --index $index/ |&tee ${prefix}-malt-run.log
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        malt: \$(malt-run --help  2>&1 | grep -o 'version.* ' | cut -f 1 -d ',' | cut -f2 -d ' ')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/malt/run/meta.yml
+++ b/modules/nf-core/malt/run/meta.yml
@ -0,0 +1,54 @@
 name: malt_run
 description: MALT, an acronym for MEGAN alignment tool, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics.
 keywords:
  - malt
  - alignment
  - metagenomics
  - ancient DNA
  - aDNA
  - palaeogenomics
  - archaeogenomics
  - microbiome
 tools:
  - malt:
      description: A tool for mapping metagenomic data
      homepage: https://www.wsi.uni-tuebingen.de/lehrstuehle/algorithms-in-bioinformatics/software/malt/
      documentation: https://software-ab.informatik.uni-tuebingen.de/download/malt/manual.pdf
      tool_dev_url: None
      doi: "10.1038/s41559-017-0446-6"
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - fastqs:
      type: file
      description: Input FASTQ files
      pattern: "*.{fastq.gz,fq.gz}"
  - index:
      type: directory
      description: Index/database directory from malt-build
      pattern: "*/"
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - rma6:
      type: file
      description: MEGAN6 RMA6 file
      pattern: "*.rma6"
  - sam:
      type: file
      description: Alignment files in Tab, Text or MEGAN-compatible SAM format
      pattern: "*.{tab,txt,sam}"
  - log:
      type: file
      description: Log of verbose MALT stdout
      pattern: "*-malt-run.log"
 authors:
  - "@jfy133"
--- a/modules/nf-core/megan/rma2info/main.nf
+++ b/modules/nf-core/megan/rma2info/main.nf
@ -0,0 +1,38 @@
 process MEGAN_RMA2INFO {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
        'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
    input:
    tuple val(meta), path(rma6)
    val(megan_summary)
    output:
    tuple val(meta), path("*.txt.gz")               , emit: txt
    tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
    path "versions.yml"                             , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def summary = megan_summary ? "-es ${prefix}.megan" : ""
    """
    rma2info \\
        -i ${rma6} \\
        -o ${prefix}.txt.gz \\
        ${summary} \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/megan/rma2info/meta.yml
+++ b/modules/nf-core/megan/rma2info/meta.yml
@ -0,0 +1,51 @@
 name: "megan_rma2info"
 description: Analyses an RMA file and exports information in text format
 keywords:
  - megan
  - rma6
  - classification
  - conversion
 tools:
  - "megan":
      description: "A tool for studying the taxonomic content of a set of DNA reads"
      homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
      documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
      tool_dev_url: "https://github.com/husonlab/megan-ce"
      doi: "10.1371/journal.pcbi.1004957"
      licence: "['GPL >=3']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - rma6:
      type: file
      description: RMA6 file from MEGAN or MALT
      pattern: "*.rma6"
  - megan_summary:
      type: boolean
      description: Specify whether to generate an MEGAN summary file
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: Compressed text file
      pattern: "*.txt.gz"
  - megan_summary:
      type: file
      description: Optionally generated MEGAN summary file
      pattern: "*.megan"
 authors:
  - "@jfy133"
--- a/modules/nf-core/metaphlan3/mergemetaphlantables/main.nf
+++ b/modules/nf-core/metaphlan3/mergemetaphlantables/main.nf
@ -0,0 +1,33 @@
 process METAPHLAN3_MERGEMETAPHLANTABLES {
    label 'process_single'
    conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
        'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
    input:
    tuple val(meta), path(profiles)
    output:
    tuple val(meta), path("${prefix}.txt") , emit: txt
    path "versions.yml" , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${meta.id}"
    """
    merge_metaphlan_tables.py \\
        $args \\
        -o ${prefix}.txt \\
        ${profiles}
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/metaphlan3/mergemetaphlantables/meta.yml
+++ b/modules/nf-core/metaphlan3/mergemetaphlantables/meta.yml
@ -0,0 +1,44 @@
 name: "metaphlan3_mergemetaphlantables"
 description: Merges output abundance tables from MetaPhlAn3
 keywords:
  - metagenomics
  - classification
  - merge
  - table
  - profiles
 tools:
  - metaphlan3:
      description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
      homepage: https://huttenhower.sph.harvard.edu/metaphlan/
      documentation: https://github.com/biobakery/MetaPhlAn
      doi: "10.7554/eLife.65088"
      licence: ["MIT License"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - profiles:
      type: file
      description: List of per-sample MetaPhlAn3 taxonomic abundance tables
      pattern: "*"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: txt
      description: Combined MetaPhlAn3 table
      pattern: "*.txt"
 authors:
  - "@jfy133"
--- a/modules/nf-core/metaphlan3/metaphlan3/main.nf
+++ b/modules/nf-core/metaphlan3/metaphlan3/main.nf
@ -0,0 +1,48 @@
 process METAPHLAN3_METAPHLAN3 {
    tag "$meta.id"
    label 'process_high'
    conda (params.enable_conda ? 'bioconda::metaphlan=3.0.12' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/metaphlan:3.0.12--pyhb7b1952_0' :
        'quay.io/biocontainers/metaphlan:3.0.12--pyhb7b1952_0' }"
    input:
    tuple val(meta), path(input)
    path metaphlan_db
    output:
    tuple val(meta), path("*_profile.txt")   ,                emit: profile
    tuple val(meta), path("*.biom")          ,                emit: biom
    tuple val(meta), path('*.bowtie2out.txt'), optional:true, emit: bt2out
    path "versions.yml"                      ,                emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input_type  = ("$input".endsWith(".fastq.gz") || "$input".endsWith(".fq.gz")) ? "--input_type fastq" :  ("$input".contains(".fasta")) ? "--input_type fasta" : ("$input".endsWith(".bowtie2out.txt")) ? "--input_type bowtie2out" : "--input_type sam"
    def input_data  = ("$input_type".contains("fastq")) && !meta.single_end ? "${input[0]},${input[1]}" : "$input"
    def bowtie2_out = "$input_type" == "--input_type bowtie2out" || "$input_type" == "--input_type sam" ? '' : "--bowtie2out ${prefix}.bowtie2out.txt"
    """
    BT2_DB=`find -L "${metaphlan_db}" -name "*rev.1.bt2" -exec dirname {} \\;`
    metaphlan \\
        --nproc $task.cpus \\
        $input_type \\
        $input_data \\
        $args \\
        $bowtie2_out \\
        --bowtie2db \$BT2_DB \\
        --biom ${prefix}.biom \\
        --output_file ${prefix}_profile.txt
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        metaphlan3: \$(metaphlan --version 2>&1 | awk '{print \$3}')
    END_VERSIONS
    """
 }
--- a/modules/nf-core/metaphlan3/metaphlan3/meta.yml
+++ b/modules/nf-core/metaphlan3/metaphlan3/meta.yml
@ -0,0 +1,58 @@
 name: metaphlan3_metaphlan3
 description: MetaPhlAn is a tool for profiling the composition of microbial communities from metagenomic shotgun sequencing data.
 keywords:
  - metagenomics
  - classification
  - fastq
  - bam
  - fasta
 tools:
  - metaphlan3:
      description: Identify clades (phyla to species) present in the metagenome obtained from a microbiome sample and their relative abundance
      homepage: https://huttenhower.sph.harvard.edu/metaphlan/
      documentation: https://github.com/biobakery/MetaPhlAn
      doi: "10.7554/eLife.65088"
      licence: ["MIT License"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - input:
      type: file
      description: Metaphlan 3.0 can classify the metagenome from a variety of input data types, including FASTQ files (single-end and paired-end), FASTA, bowtie2-produced SAM files (produced from alignments to the MetaPHlAn marker database) and intermediate bowtie2 alignment files (bowtie2out)
      pattern: "*.{fastq.gz, fasta, fasta.gz, sam, bowtie2out.txt}"
  - metaphlan_db:
      type: file
      description: |
        Directory containing pre-downloaded and uncompressed MetaPhlAn3 database downloaded from: http://cmprod1.cibio.unitn.it/biobakery3/metaphlan_databases/.
        Note that you will also need to specify `--index` and the database version name (e.g. 'mpa_v31_CHOCOPhlAn_201901') in your module.conf ext.args for METAPHLAN3_METAPHLAN3!
      pattern: "*/"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - profile:
      type: file
      description: Tab-separated output file of the predicted taxon relative abundances
      pattern: "*.{txt}"
  - biom:
      type: file
      description: General-use format for representing biological sample by observation contingency tables
      pattern: "*.{biom}"
  - bowtie2out:
      type: file
      description: Intermediate Bowtie2 output produced from mapping the metagenome against the MetaPHlAn marker database ( not compatible with `bowtie2out` files generated with MetaPhlAn versions below 3 )
      pattern: "*.{bowtie2out.txt}"
 authors:
  - "@MGordon09"
--- a/modules/nf-core/minimap2/align/main.nf
+++ b/modules/nf-core/minimap2/align/main.nf
@ -0,0 +1,48 @@
 process MINIMAP2_ALIGN {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? 'bioconda::minimap2=2.21 bioconda::samtools=1.12' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' :
        'quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }"
    input:
    tuple val(meta), path(reads)
    path reference
    val bam_format
    val cigar_paf_format
    val cigar_bam
    output:
    tuple val(meta), path("*.paf"), optional: true, emit: paf
    tuple val(meta), path("*.bam"), optional: true, emit: bam
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input_reads = meta.single_end ? "$reads" : "${reads[0]} ${reads[1]}"
    def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf"
    def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
    def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
    """
    minimap2 \\
        $args \\
        -t $task.cpus \\
        $reference \\
        $input_reads \\
        $cigar_paf \\
        $set_cigar_bam \\
        $bam_output
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        minimap2: \$(minimap2 --version 2>&1)
    END_VERSIONS
    """
 }
--- a/modules/nf-core/minimap2/align/meta.yml
+++ b/modules/nf-core/minimap2/align/meta.yml
@ -0,0 +1,65 @@
 name: minimap2_align
 description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
 keywords:
  - align
  - fasta
  - fastq
  - genome
  - paf
  - reference
 tools:
  - minimap2:
      description: |
        A versatile pairwise aligner for genomic and spliced nucleotide sequences.
      homepage: https://github.com/lh3/minimap2
      documentation: https://github.com/lh3/minimap2#uguide
      licence: ["MIT"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input FASTA or FASTQ files of size 1 and 2 for single-end
        and paired-end data, respectively.
  - reference:
      type: file
      description: |
        Reference database in FASTA format.
  - bam_format:
      type: boolean
      description: Specify that output should be in BAM format
  - cigar_paf_format:
      type: boolean
      description: Specify that output CIGAR should be in PAF format
  - cigar_bam:
      type: boolean
      description: |
        Write CIGAR with >65535 ops at the CG tag. This is recommended when
        doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - paf:
      type: file
      description: Alignment in PAF format
      pattern: "*.paf"
  - bam:
      type: file
      description: Alignment in BAM format
      pattern: "*.bam"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@heuermh"
  - "@sofstam"
  - "@sateeshperi"
  - "@jfy133"
--- a/modules/nf-core/minimap2/index/main.nf
+++ b/modules/nf-core/minimap2/index/main.nf
@ -0,0 +1,33 @@
 process MINIMAP2_INDEX {
    label 'process_medium'
    conda (params.enable_conda ? 'bioconda::minimap2=2.21' : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/minimap2:2.21--h5bf99c6_0' :
        'quay.io/biocontainers/minimap2:2.21--h5bf99c6_0' }"
    input:
    tuple val(meta), path(fasta)
    output:
    tuple val(meta), path("*.mmi"), emit: index
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    """
    minimap2 \\
        -t $task.cpus \\
        -d ${fasta.baseName}.mmi \\
        $args \\
        $fasta
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        minimap2: \$(minimap2 --version 2>&1)
    END_VERSIONS
    """
 }
--- a/modules/nf-core/minimap2/index/meta.yml
+++ b/modules/nf-core/minimap2/index/meta.yml
@ -0,0 +1,40 @@
 name: minimap2_index
 description: Provides fasta index required by minimap2 alignment.
 keywords:
  - index
  - fasta
  - reference
 tools:
  - minimap2:
      description: |
        A versatile pairwise aligner for genomic and spliced nucleotide sequences.
      homepage: https://github.com/lh3/minimap2
      documentation: https://github.com/lh3/minimap2#uguide
      licence: ["MIT"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - fasta:
      type: file
      description: |
        Reference database in FASTA format.
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - mmi:
      type: file
      description: Minimap2 fasta index.
      pattern: "*.mmi"
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
 authors:
  - "@yuukiiwa"
  - "@drpatelh"
--- a/modules/nf-core/motus/merge/main.nf
+++ b/modules/nf-core/motus/merge/main.nf
@ -0,0 +1,45 @@
 process MOTUS_MERGE {
    tag "$meta.id"
    label 'process_single'
    conda (params.enable_conda ? "bioconda::motus=3.0.3" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/motus:3.0.3--pyhdfd78af_0':
        'quay.io/biocontainers/motus:3.0.3--pyhdfd78af_0' }"
    input:
    tuple val(meta), path(input)
    path db // to stop docker saying it can't find it... would have to have the module in upstream steps anyway
    path profile_version_yml, stageAs: 'profile_version.yml'
    output:
    tuple val(meta), path("*.txt") , optional: true, emit: txt
    tuple val(meta), path("*.biom"), optional: true, emit: biom
    path "versions.yml" , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def cmd_input = input.size() > 1 ? "-i ${input.join(',')}" : input.isDirectory() ? "-d ${input}" : "-i ${input}"
    def suffix = task.ext.args?.contains("-B") ? "biom" : "txt"
    """
    motus \\
        merge \\
        -db $db \\
        ${cmd_input} \\
        $args \\
        -o ${prefix}.${suffix}
    ## Take version from the mOTUs/profile module output, as cannot reconstruct
    ## version without having database staged in this directory.
    VERSION=\$(cat ${profile_version_yml} | grep '/*motus:.*' | sed 's/.*otus: //g')
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        motus: \$VERSION
    END_VERSIONS
    """
 }
--- a/modules/nf-core/motus/merge/meta.yml
+++ b/modules/nf-core/motus/merge/meta.yml
@ -0,0 +1,54 @@
 name: "motus_merge"
 description: Taxonomic meta-omics profiling using universal marker genes
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
  - merging
  - merge
  - otu table
 tools:
  - "motus":
      description: "Marker gene-based OTU (mOTU) profiling"
      homepage: "https://motu-tool.org/"
      documentation: "https://github.com/motu-tool/mOTUs/wiki"
      tool_dev_url: "https://github.com/motu-tool/mOTUs"
      doi: "10.1186/s40168-022-01410-z"
      licence: "['GPL v3']"
 input:
  - input:
      type: file
      description: |
        List of output files (more than one) from motus profile,
        or a single directory containing motus output files.
  - db:
      type: directory
      description: |
        mOTUs database downloaded by `motus downloadDB`
        pattern: "db_mOTU/"
  - profile_version_yml:
      type: file
      description: |
        A single versions.yml file output from motus/profile. motus/merge cannot reconstruct
        this itself without having the motus database present and configured with the tool
        so here we take it from what is already reported by the upstream module.
      pattern: "versions.yml"
 output:
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - txt:
      type: file
      description: OTU table in txt format, if BIOM format not requested
      pattern: "*.txt"
  - biom:
      type: file
      description: OTU table in biom format, if BIOM format requested
      pattern: "*.biom"
 authors:
  - "@jfy133"
--- a/modules/nf-core/motus/profile/main.nf
+++ b/modules/nf-core/motus/profile/main.nf
@ -0,0 +1,56 @@
 process MOTUS_PROFILE {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::motus=3.0.3" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/motus:3.0.3--pyhdfd78af_0':
        'quay.io/biocontainers/motus:3.0.3--pyhdfd78af_0' }"
    input:
    tuple val(meta), path(reads)
    path db
    output:
    tuple val(meta), path("*.out"), emit: out
    tuple val(meta), path("*.bam"), optional: true, emit: bam
    tuple val(meta), path("*.mgc"), optional: true, emit: mgc
    tuple val(meta), path("*.log")                , emit: log
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def inputs = reads[0].getExtension() == 'bam' ?
                    "-i ${reads}" :
                    reads[0].getExtension() == 'mgc' ? "-m $reads" :
                        meta.single_end ?
                            "-s $reads" : "-f ${reads[0]} -r ${reads[1]}"
    def refdb = db ? "-db ${db}" : ""
    """
    motus profile \\
        $args \\
        $inputs \\
        $refdb \\
        -t $task.cpus \\
        -n $prefix \\
        -o ${prefix}.out \\
        2> ${prefix}.log
    ## mOTUs version number is not available from command line.
    ## mOTUs save the version number in index database folder.
    ## mOTUs will check the database version is same version as exec version.
    if [ "$db" == "" ]; then
        VERSION=\$(echo \$(motus -h 2>&1) | sed 's/^.*Version: //; s/References.*\$//')
    else
        VERSION=\$(grep motus $db/db_mOTU_versions | sed 's/motus\\t//g')
    fi
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        motus: \$VERSION
    END_VERSIONS
    """
 }
--- a/modules/nf-core/motus/profile/meta.yml
+++ b/modules/nf-core/motus/profile/meta.yml
@ -0,0 +1,65 @@
 name: "motus_profile"
 description: Taxonomic meta-omics profiling using universal marker genes
 keywords:
  - classify
  - metagenomics
  - fastq
  - taxonomic profiling
 tools:
  - "motus":
      description: "Marker gene-based OTU (mOTU) profiling"
      homepage: "https://motu-tool.org/"
      documentation: "https://github.com/motu-tool/mOTUs/wiki"
      tool_dev_url: "https://github.com/motu-tool/mOTUs"
      doi: "10.1186/s40168-022-01410-z"
      licence: "['GPL v3']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
        List of input fastq/fasta files of size 1 and 2 for single-end and paired-end data,
        respectively.
        Or the intermediate bam file mapped by bwa to the mOTUs database or
        the output bam file from motus profile.
        Or the intermediate mgc read counts table.
      pattern: "*.{fastq,fq,fasta,fa,fastq.gz,fq.gz,fasta.gz,fa.gz,.bam,.mgc}"
  - db:
      type: directory
      description: |
        mOTUs database downloaded by `motus downloadDB`
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - out:
      type: file
      description: Results with taxonomic classification of each read
      pattern: "*.out"
  - bam:
      type: file
      description: Optional intermediate sorted BAM file from BWA
      pattern: "*.{bam}"
  - mgc:
      type: file
      description: Optional intermediate mgc read count table file saved with `-M`.
      pattern: "*.{mgc}"
  - log:
      type: file
      description: Standard error logging file containing summary statistics
      pattern: "*.log"
 authors:
  - "@jianhong"
--- a/modules/nf-core/porechop/porechop/main.nf
+++ b/modules/nf-core/porechop/porechop/main.nf
@ -0,0 +1,36 @@
 process PORECHOP_PORECHOP {
    tag "$meta.id"
    label 'process_medium'
    conda (params.enable_conda ? "bioconda::porechop=0.2.4" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/porechop:0.2.4--py39h7cff6ad_2' :
        'quay.io/biocontainers/porechop:0.2.4--py39h7cff6ad_2' }"
    input:
    tuple val(meta), path(reads)
    output:
    tuple val(meta), path("*.fastq.gz"), emit: reads
    tuple val(meta), path("*.log")     , emit: log
    path "versions.yml"                , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    """
    porechop \\
        -i $reads \\
        -t $task.cpus \\
        $args \\
        -o ${prefix}.fastq.gz \\
        > ${prefix}.log
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        porechop: \$( porechop --version )
    END_VERSIONS
    """
 }
--- a/modules/nf-core/porechop/porechop/meta.yml
+++ b/modules/nf-core/porechop/porechop/meta.yml
@ -0,0 +1,55 @@
 name: "porechop_porechop"
 description: Adapter removal and demultiplexing of Oxford Nanopore reads
 keywords:
  - adapter
  - nanopore
  - demultiplexing
 tools:
  - porechop:
      description: Adapter removal and demultiplexing of Oxford Nanopore reads
      homepage: "https://github.com/rrwick/Porechop"
      documentation: "https://github.com/rrwick/Porechop"
      tool_dev_url: "https://github.com/rrwick/Porechop"
      doi: "10.1099/mgen.0.000132"
      licence: ["GPL v3"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: fastq/fastq.gz file
      pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - reads:
      type: file
      description: Demultiplexed and/or adapter-trimmed fastq.gz file
      pattern: "*.{fastq.gz}"
  - log:
      type: file
      description: Log file containing stdout information
      pattern: "*.log"
 authors:
  - "@ggabernet"
  - "@jasmezz"
  - "@d4straub"
  - "@LaurenceKuhl"
  - "@SusiJo"
  - "@jonasscheid"
  - "@jonoave"
  - "@GokceOGUZ"
  - "@jfy133"
--- a/modules/nf-core/prinseqplusplus/main.nf
+++ b/modules/nf-core/prinseqplusplus/main.nf
@ -0,0 +1,61 @@
 process PRINSEQPLUSPLUS {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::prinseq-plus-plus=1.2.3" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/prinseq-plus-plus:1.2.3--hc90279e_1':
        'quay.io/biocontainers/prinseq-plus-plus:1.2.3--hc90279e_1' }"
    input:
    tuple val(meta), path(reads)
    output:
    tuple val(meta), path("*_good_out*.fastq.gz")                  , emit: good_reads
    tuple val(meta), path("*_single_out*.fastq.gz"), optional: true, emit: single_reads
    tuple val(meta), path("*_bad_out*.fastq.gz")   , optional: true, emit: bad_reads
    tuple val(meta), path("*.log")                                 , emit: log
    path "versions.yml"                                            , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    if (meta.single_end) {
        """
        prinseq++ \\
            -threads $task.cpus \\
            -fastq ${reads} \\
            -out_name ${prefix} \\
            -out_gz \\
            -VERBOSE 1 \\
            $args \\
            | tee ${prefix}.log
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
        END_VERSIONS
        """
    } else {
        """
        prinseq++ \\
            -threads $task.cpus \\
            -fastq ${reads[0]} \\
            -fastq2 ${reads[1]} \\
            -out_name ${prefix} \\
            -out_gz \\
            -VERBOSE 1 \\
            $args \\
            | tee ${prefix}.log
        cat <<-END_VERSIONS > versions.yml
        "${task.process}":
            prinseqplusplus: \$(echo \$(prinseq++ --version | cut -f 2 -d ' ' ))
        END_VERSIONS
        """
    }
 }
--- a/Show more
+++ b/Show more