Merge branch 'dev' into motus

2024-12-22 14:58:17 +00:00 · 2022-07-04 13:14:42 +02:00 · 2022-07-04 13:14:42 +02:00 · 26e31c782d
commit 26e31c782d
parent dc48935651 913080a5b1
25 changed files with 592 additions and 75 deletions
--- a/CITATIONS.md
+++ b/CITATIONS.md
@ -36,6 +36,10 @@

  > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.

+- [Krona](https://doi.org/10.1186/1471-2105-12-385)
+
+  > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
+
 - [MALT](https://doi.org/10.1038/s41559-017-0446-6)

  > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@ -48,7 +48,7 @@ def check_samplesheet(file_in, file_out):
    2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
    """

-    FQ_EXTENSIONS = (".fq", ".fq.gz", ".fastq", ".fastq.gz")
+    FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
    FA_EXTENSIONS = (
        ".fa",
        ".fa.gz",
--- a/conf/modules.config
+++ b/conf/modules.config
@ -289,7 +289,15 @@ process {
        publishDir = [
            path: { "${params.outdir}/kraken2/${meta.db_name}" },
            mode: params.publish_dir_mode,
-            pattern: '*.{txt}'
+            pattern: '*.{txt,report,fastq.gz}'
+        ]
+    }
+
+    withName: KRONA_KTIMPORTTEXT {
+        publishDir = [
+            path: { "${params.outdir}/krona" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{html}'
        ]
    }

@ -307,7 +315,7 @@ process {
        publishDir = [
            path: { "${params.outdir}/centrifuge/${meta.db_name}" },
            mode: params.publish_dir_mode,
-            pattern: '*.txt'
+            pattern: '*.{txt,sam,gz}'
        ]
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
@ -343,6 +351,10 @@ process {
        ]
    }

+    withName: KAIJU_KAIJU2KRONA {
+        ext.args = '-v -u'
+    }
+
    withName: DIAMOND_BLASTX {
        ext.args = { "${meta.db_params}" }
        ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
--- a/conf/test.config
+++ b/conf/test.config
@ -38,10 +38,18 @@ params {
    run_centrifuge                        = true
    run_diamond                           = true
    run_motus                             = false
+    run_krona                             = true
+    malt_save_reads                       = true
+    kraken2_save_reads                    = true
+    centrifuge_save_reads                 = true
+    diamond_save_reads                    = true
 }

 process {
    withName: MALT_RUN {
        maxForks = 1
    }
+    withName: MEGAN_RMA2INFO {
+        maxForks = 1
+    }
 }
--- a/conf/test_nopreprocessing.config
+++ b/conf/test_nopreprocessing.config
@ -38,6 +38,7 @@ params {
    run_centrifuge                        = true
    run_diamond                           = true
    run_motus                             = false
+    run_krona                             = true
 }

 process {
--- a/docs/usage.md
+++ b/docs/usage.md
@ -12,6 +12,8 @@

 nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).

+> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
+
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.

 This samplesheet is then specified on the command line as follows:
@ -219,6 +221,12 @@ Activating this functionality will concatenate the FASTQ files with the same sam

 You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.

+##### Profiling
+
+###### MALT
+
+nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
+
 ### Updating the pipeline

 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
--- a/modules.json
+++ b/modules.json
@ -28,7 +28,7 @@
                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
            },
            "diamond/blastx": {
-                "git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de"
+                "git_sha": "bd3bfe0817246082525ab93707976676b1fe208b"
            },
            "fastp": {
                "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
@ -42,14 +42,23 @@
            "kaiju/kaiju": {
                "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
            },
+            "kaiju/kaiju2krona": {
+                "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f"
+            },
            "kaiju/kaiju2table": {
                "git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
            },
            "kraken2/kraken2": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                "git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
+            },
+            "krakentools/kreport2krona": {
+                "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422"
+            },
+            "krona/ktimporttext": {
+                "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
            },
            "malt/run": {
-                "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
+                "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
            },
            "megan/rma2info": {
                "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
--- a/modules/local/krona_cleanup.nf
+++ b/modules/local/krona_cleanup.nf
@ -0,0 +1,40 @@
+process KRONA_CLEANUP {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
+        'biocontainers/biocontainers:v1.2.0_cv1' }"
+
+    input:
+    tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
+
+    output:
+    tuple val(meta), path("*.txt"), emit: txt
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    # Copy the file to a new name
+    cp ${krona} ${prefix}.txt
+
+    # Remove ugly 'x__' prefixes for each of the taxonomic levels
+    LEVELS=(d k p c o f g s)
+    for L in "\${LEVELS[@]}"; do
+        sed -i "s/\${L}__//g" ${prefix}.txt
+    done
+
+    # Remove underscores that are standing in place of spaces
+    sed -i "s/_/ /g" ${prefix}.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/diamond/blastx/main.nf
+++ b/modules/nf-core/modules/diamond/blastx/main.nf
@ -2,21 +2,26 @@ process DIAMOND_BLASTX {
    tag "$meta.id"
    label 'process_medium'

-    // Dimaond is limited to v2.0.9 because there is not a
-    // singularity version higher than this at the current time.
-    conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
+    conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' :
-        'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }"
+        'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
+        'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"

    input:
    tuple val(meta), path(fasta)
    path db
-    val outext
+    val out_ext
+    val blast_columns

    output:
-    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
-    path "versions.yml"           , emit: versions
+    tuple val(meta), path('*.blast'), optional: true, emit: blast
+    tuple val(meta), path('*.xml')  , optional: true, emit: xml
+    tuple val(meta), path('*.txt')  , optional: true, emit: txt
+    tuple val(meta), path('*.daa')  , optional: true, emit: daa
+    tuple val(meta), path('*.sam')  , optional: true, emit: sam
+    tuple val(meta), path('*.tsv')  , optional: true, emit: tsv
+    tuple val(meta), path('*.paf')  , optional: true, emit: paf
+    path "versions.yml"                               , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -24,7 +29,8 @@ process DIAMOND_BLASTX {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    switch ( outext ) {
+    def columns = blast_columns ? "${blast_columns}" : ''
+    switch ( out_ext ) {
        case "blast": outfmt = 0; break
        case "xml": outfmt = 5; break
        case "txt": outfmt = 6; break
@ -32,6 +38,11 @@ process DIAMOND_BLASTX {
        case "sam": outfmt = 101; break
        case "tsv": outfmt = 102; break
        case "paf": outfmt = 103; break
+        default:
+            outfmt = '6';
+            out_ext = 'txt';
+            log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
+            break
    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -41,9 +52,9 @@ process DIAMOND_BLASTX {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
-        --outfmt ${outfmt} \\
+        --outfmt ${outfmt} ${columns} \\
        $args \\
-        --out ${prefix}.${outext}
+        --out ${prefix}.${out_ext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/nf-core/modules/diamond/blastx/meta.yml
+++ b/modules/nf-core/modules/diamond/blastx/meta.yml
@ -28,7 +28,7 @@ input:
      type: directory
      description: Directory containing the nucelotide blast database
      pattern: "*"
-  - outext:
+  - out_ext:
      type: string
      description: |
        Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
+  - blast:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{blast}"
+  - xml:
+      type: file
+      description: File containing blastp hits
+      pattern: "*.{xml}"
  - txt:
      type: file
-      description: File containing blastx hits
-      pattern: "*.{blastx.txt}"
+      description: File containing hits in tabular BLAST format.
+      pattern: "*.{txt}"
+  - daa:
+      type: file
+      description: File containing hits DAA format
+      pattern: "*.{daa}"
+  - sam:
+      type: file
+      description: File containing aligned reads in SAM format
+      pattern: "*.{sam}"
+  - tsv:
+      type: file
+      description: Tab separated file containing taxonomic classification of hits
+      pattern: "*.{tsv}"
+  - paf:
+      type: file
+      description: File containing aligned reads in pairwise mapping format format
+      pattern: "*.{paf}"
  - versions:
      type: file
      description: File containing software versions
--- a/modules/nf-core/modules/kaiju/kaiju2krona/main.nf
+++ b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf
@ -0,0 +1,39 @@
+process KAIJU_KAIJU2KRONA {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
+        'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
+
+    input:
+    tuple val(meta), path(tsv)
+    path(db)
+
+    output:
+    tuple val(meta), path("*.txt"), emit: txt
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    dbnodes=`find -L ${db} -name "*nodes.dmp"`
+    dbnames=`find -L ${db} -name "*names.dmp"`
+    kaiju2krona \\
+        $args \\
+        -t \$dbnodes \\
+        -n \$dbnames \\
+        -i ${tsv} \\
+        -o ${prefix}.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml
+++ b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml
@ -0,0 +1,44 @@
+name: kaiju_kaiju2krona
+description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona.
+keywords:
+  - taxonomy
+  - visualisation
+  - krona chart
+  - metagenomics
+tools:
+  - "kaiju":
+      description: Fast and sensitive taxonomic classification for metagenomics
+      homepage: https://kaiju.binf.ku.dk/
+      documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
+      tool_dev_url: https://github.com/bioinformatics-centre/kaiju
+      doi: "10.1038/ncomms11257"
+      licence: ["GNU GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - tsv:
+      type: file
+      description: Kaiju tab-separated output file
+      pattern: "*.{tsv,txt}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - txt:
+      type: file
+      description: Krona text-based input file converted from Kaiju report
+      pattern: "*.{txt,krona}"
+
+authors:
+  - "@MillironX"
--- a/modules/nf-core/modules/kraken2/kraken2/main.nf
+++ b/modules/nf-core/modules/kraken2/kraken2/main.nf
@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
    input:
    tuple val(meta), path(reads)
    path  db
+    val save_output_fastqs
+    val save_reads_assignment

    output:
-    tuple val(meta), path('*classified*')  , emit: classified
-    tuple val(meta), path('*unclassified*'), emit: unclassified
-    tuple val(meta), path('*report.txt')   , emit: txt
-    path "versions.yml"                    , emit: versions
+    tuple val(meta), path('*classified*')     , optional:true, emit: classified_reads_fastq
+    tuple val(meta), path('*unclassified*')   , optional:true, emit: unclassified_reads_fastq
+    tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
+    tuple val(meta), path('*report.txt')                     , emit: report
+    path "versions.yml"                                      , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
    def paired       = meta.single_end ? "" : "--paired"
    def classified   = meta.single_end ? "${prefix}.classified.fastq"   : "${prefix}.classified#.fastq"
    def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
+    def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
+    def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
+    def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
+    def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
+
    """
    kraken2 \\
        --db $db \\
        --threads $task.cpus \\
-        --unclassified-out $unclassified \\
-        --classified-out $classified \\
        --report ${prefix}.kraken2.report.txt \\
        --gzip-compressed \\
+        $unclassified_command \\
+        $classified_command \\
+        $readclassification_command \\
        $paired \\
        $args \\
        $reads

-    pigz -p $task.cpus *.fastq
+    $compress_reads_command

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/nf-core/modules/kraken2/kraken2/meta.yml
+++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml
@ -27,25 +27,40 @@ input:
  - db:
      type: directory
      description: Kraken2 database
+  - save_output_fastqs:
+      type: boolean
+      description: |
+        If true, optional commands are added to save classified and unclassified reads
+        as fastq files
+  - save_reads_assignment:
+      type: boolean
+      description: |
+        If true, an optional command is added to save a file reporting the taxonomic
+        classification of each input read
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - classified:
+  - classified_reads_fastq:
      type: file
      description: |
-        Reads classified to belong to any of the taxa
+        Reads classified as belonging to any of the taxa
        on the Kraken2 database.
      pattern: "*{fastq.gz}"
-  - unclassified:
+  - unclassified_reads_fastq:
      type: file
      description: |
-        Reads not classified to belong to any of the taxa
+        Reads not classified to any of the taxa
        on the Kraken2 database.
      pattern: "*{fastq.gz}"
-  - txt:
+  - classified_reads_assignment:
+      type: file
+      description: |
+        Kraken2 output file indicating the taxonomic assignment of
+        each input read
+  - report:
      type: file
      description: |
        Kraken2 report containing stats about classified
--- a/modules/nf-core/modules/krakentools/kreport2krona/main.nf
+++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf
@ -0,0 +1,36 @@
+def VERSION = '1.2' // Version information not provided by tool on CLI
+
+process KRAKENTOOLS_KREPORT2KRONA {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
+        'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
+
+    input:
+    tuple val(meta), path(kreport)
+
+    output:
+    tuple val(meta), path("*.txt"), emit: txt
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    kreport2krona.py \\
+        -r ${kreport} \\
+        -o ${prefix}.txt \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        kreport2krona.py: ${VERSION}
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/krakentools/kreport2krona/meta.yml
+++ b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml
@ -0,0 +1,41 @@
+name: krakentools_kreport2krona
+description: Takes a Kraken report file and prints out a krona-compatible TEXT file
+keywords:
+  - kraken
+  - krona
+  - metagenomics
+  - visualization
+tools:
+  - krakentools:
+      description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
+      homepage: https://github.com/jenniferlu717/KrakenTools
+      licence: ["GPL v3"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - kreport:
+      type: file
+      description: Kraken report
+      pattern: "*.{txt,kreport}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - krona:
+      type: file
+      description: Krona text-based input file converted from Kraken report
+      pattern: "*.{txt,krona}"
+
+authors:
+  - "@MillironX"
--- a/modules/nf-core/modules/krona/ktimporttext/main.nf
+++ b/modules/nf-core/modules/krona/ktimporttext/main.nf
@ -0,0 +1,34 @@
+process KRONA_KTIMPORTTEXT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
+        'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(report)
+
+    output:
+    tuple val(meta), path ('*.html'), emit: html
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    ktImportText  \\
+        $args \\
+        -o ${prefix}.html \\
+        $report
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
+    END_VERSIONS
+    """
+}
--- a/modules/nf-core/modules/krona/ktimporttext/meta.yml
+++ b/modules/nf-core/modules/krona/ktimporttext/meta.yml
@ -0,0 +1,47 @@
+name: "krona_ktimporttext"
+description: Creates a Krona chart from text files listing quantities and lineages.
+keywords:
+  - plot
+  - taxonomy
+  - interactive
+  - html
+  - visualisation
+  - krona chart
+  - metagenomics
+tools:
+  - krona:
+      description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
+      homepage: https://github.com/marbl/Krona/wiki/KronaTools
+      documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
+      tool_dev_url: https://github.com/marbl/Krona
+      doi: 10.1186/1471-2105-12-385
+      licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+  - report:
+      type: file
+      description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
+      pattern: "*.{txt}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - html:
+      type: file
+      description: A html file containing an interactive krona plot.
+      pattern: "*.{html}"
+
+authors:
+  - "@jianhong"
--- a/modules/nf-core/modules/malt/run/main.nf
+++ b/modules/nf-core/modules/malt/run/main.nf
@ -2,10 +2,10 @@ process MALT_RUN {
    tag "$meta.id"
    label 'process_high'

-    conda (params.enable_conda ? "bioconda::malt=0.53" : null)
+    conda (params.enable_conda ? "bioconda::malt=0.41" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
-        'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
+        'quay.io/biocontainers/malt:0.41--1' }"

    input:
    tuple val(meta), path(fastqs)
@ -33,7 +33,6 @@ process MALT_RUN {

    """
    malt-run \\
-        -J-Xmx${avail_mem}g \\
        -t $task.cpus \\
        -v \\
        -o . \\
--- a/nextflow.config
+++ b/nextflow.config
@ -102,16 +102,17 @@ params {
    // MALT
    run_malt                   = false
    malt_mode                  = 'BlastN'
-    malt_generatemegansummary = false
+    malt_generate_megansummary = false
+    malt_save_reads            = false

    // kraken2
-    run_kraken2                = false
+    run_kraken2                     = false
+    kraken2_save_reads              = false
+    kraken2_save_readclassification = false

    // centrifuge
    run_centrifuge             = false
-    centrifuge_save_unaligned  = false
-    centrifuge_save_aligned    = false
-    centrifuge_sam_format      = false
+    centrifuge_save_reads      = false

    // metaphlan3
    run_metaphlan3             = false
@ -122,10 +123,14 @@ params {

    // diamond
    run_diamond                = false
-    diamond_output_format      = 'txt'
+    diamond_output_format      = 'tsv'  // TSV is only format with taxonomic information apparently
+    diamond_save_reads         = false // this will override default diamond output format so no taxonomic profile is generated!

    // mOTUs
    run_motus                  = false
+
+    // krona
+    run_krona                  = false
 }

 // Load base.config by default for all pipelines
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@ -260,6 +260,8 @@
    "properties": {
        "databases": {
            "type": "string",
+            "mimetype": "text/csv",
+            "format": "file-path",
            "default": "None"
        },
        "shortread_qc_excludeunmerged": {
@ -278,15 +280,6 @@
        "run_centrifuge": {
            "type": "boolean"
        },
-        "centrifuge_save_unaligned": {
-            "type": "boolean"
-        },
-        "centrifuge_save_aligned": {
-            "type": "boolean"
-        },
-        "centrifuge_sam_format": {
-            "type": "boolean"
-        },
        "run_metaphlan3": {
            "type": "boolean",
            "description": "Enable MetaPhlAn for taxonomic profiling"
@ -386,7 +379,7 @@
        "run_kaiju": {
            "type": "boolean"
        },
-        "malt_generatemegansummary": {
+        "malt_generate_megansummary": {
            "type": "boolean"
        },
        "kaiju_taxon_name": {
@ -430,6 +423,24 @@
        },
        "run_motus": {
            "type": "boolean"
+        },
+        "malt_save_reads": {
+            "type": "boolean"
+        },
+        "kraken2_save_reads": {
+            "type": "boolean"
+        },
+        "kraken2_save_readclassification": {
+            "type": "boolean"
+        },
+        "centrifuge_save_reads": {
+            "type": "boolean"
+        },
+        "diamond_save_reads": {
+            "type": "boolean"
+        },
+        "run_krona": {
+            "type": "boolean"
        }
    }
 }
--- a/subworkflows/local/longread_preprocessing.nf
+++ b/subworkflows/local/longread_preprocessing.nf
@ -48,7 +48,7 @@ workflow LONGREAD_PREPROCESSING {

    }

-    FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
+    FASTQC_PROCESSED ( ch_processed_reads )
    ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )

    emit:
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -21,7 +21,8 @@ workflow PROFILING {
    main:
    ch_versions             = Channel.empty()
    ch_multiqc_files        = Channel.empty()
-    ch_raw_profiles    = Channel.empty()
+    ch_raw_classifications  = Channel.empty()
+    ch_raw_profiles         = Channel.empty()

 /*
        COMBINE READS WITH POSSIBLE DATABASES
@ -62,14 +63,29 @@ workflow PROFILING {

        // MALT: We groupTuple to have all samples in one channel for MALT as database
        // loading takes a long time, so we only want to run it once per database
-        // TODO document somewhere we only accept illumina short reads for MALT?
        ch_input_for_malt =  ch_input_for_profiling.malt
                                .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
                                .map {
-                                    it ->
-                                        def temp_meta =  [ id: it[2]['db_name']]  + it[2]
-                                        def db = it[3]
-                                        [ temp_meta, it[1], db ]
+                                    meta, reads, db_meta, db ->
+
+                                        // Reset entire input meta for MALT to just database name,
+                                        // as we don't run run on a per-sample basis due to huge datbaases
+                                        // so all samples are in one run and so sample-specific metadata
+                                        // unnecessary. Set as database name to prevent `null` job ID and prefix.
+                                        def temp_meta = [ id: meta['db_name'] ]
+
+                                        // Extend database parameters to specify whether to save alignments or not
+                                        def new_db_meta = db_meta.clone()
+                                        def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
+                                        new_db_meta['db_params'] = db_meta['db_params'] + sam_format
+
+                                        // Combine reduced sample metadata with updated database parameters metadata,
+                                        // make sure id is db_name for publishing purposes.
+                                        def new_meta = temp_meta + new_db_meta
+                                        new_meta['id'] = new_meta['db_name']
+
+                                        [ new_meta, reads, db ]
+
                                }
                                .groupTuple(by: [0,2])
                                .multiMap {
@ -93,10 +109,11 @@ workflow PROFILING {
                                        [ meta_new, rma ]
                                }

-        MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
-        ch_multiqc_files   = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([])  )
-        ch_versions        = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
-        ch_raw_profiles    = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
+        MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
+        ch_multiqc_files       = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([])  )
+        ch_versions            = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
+        ch_raw_profiles        = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )

    }

@ -109,10 +126,11 @@ workflow PROFILING {
                                        db: it[3]
                                }

-        KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db  )
-        ch_multiqc_files   = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([])  )
-        ch_versions        = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
-        ch_raw_profiles    = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
+        KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
+        ch_multiqc_files       = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([])  )
+        ch_versions            = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
+        ch_raw_profiles        = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )

    }

@ -129,10 +147,11 @@ workflow PROFILING {
                                        db: it[3]
                                }

-        CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format  )
+        CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads  )
        CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
-        ch_versions        = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
-        ch_raw_profiles    = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
+        ch_versions            = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
+        ch_raw_profiles        = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )

    }

@ -168,6 +187,7 @@ workflow PROFILING {
        KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
        ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([])  )
        ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
        ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )

    }
@ -181,9 +201,13 @@ workflow PROFILING {
                                        db: it[3]
                                }

-        DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format )
+        // diamond only accepts single output file specification, therefore
+        // this will replace output file!
+        ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
+
+        DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] )
        ch_versions        = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
-        ch_raw_profiles    = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output )
+        ch_raw_profiles    = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv )

    }

@ -207,7 +231,8 @@ workflow PROFILING {
    }

    emit:
-    profiles = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom
-    versions = ch_versions          // channel: [ versions.yml ]
-    mqc      = ch_multiqc_files
+    classifications = ch_raw_classifications
+    profiles        = ch_raw_profiles    // channel: [ val(meta), [ reads ] ] - should be text files or biom
+    versions        = ch_versions          // channel: [ versions.yml ]
+    mqc             = ch_multiqc_files
 }
--- a/subworkflows/local/visualization_krona.nf
+++ b/subworkflows/local/visualization_krona.nf
@ -0,0 +1,84 @@
+//
+// Create Krona visualizations
+//
+
+include { KAIJU_KAIJU2KRONA         } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main'
+include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main'
+include { KRONA_CLEANUP             } from '../../modules/local/krona_cleanup'
+include { KRONA_KTIMPORTTEXT        } from '../../modules/nf-core/modules/krona/ktimporttext/main'
+
+workflow VISUALIZATION_KRONA {
+    take:
+    classifications
+    profiles
+    databases
+
+    main:
+    ch_krona_text = Channel.empty()
+    ch_krona_html = Channel.empty()
+    ch_versions = Channel.empty()
+
+    /*
+        Split profile results based on tool they come from
+    */
+    ch_input_profiles = profiles
+        .branch {
+            centrifuge: it[0]['tool'] == 'centrifuge'
+            kraken2: it[0]['tool'] == 'kraken2'
+            unknown: true
+        }
+    ch_input_classifications = classifications
+        .branch {
+            kaiju: it[0]['tool'] == 'kaiju'
+            unknown: true
+        }
+
+    /*
+        Convert Kraken2 formatted reports into Krona text files
+    */
+    ch_kraken_reports = ch_input_profiles.kraken2
+        .mix( ch_input_profiles.centrifuge )
+    KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports )
+    ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt )
+    ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() )
+
+    /*
+        Combine Kaiju profiles with their databases
+    */
+    ch_input_for_kaiju2krona = ch_input_classifications.kaiju
+        .map{ [it[0]['db_name'], it[0], it[1]] }
+        .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 )
+        .multiMap{
+            it ->
+                profiles: [it[1], it[2]]
+                db: it[3]
+        }
+
+    /*
+        Convert Kaiju formatted reports into Krona text files
+    */
+    KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db )
+    ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt )
+    ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() )
+
+    /*
+        Remove taxonomy level annotations from the Krona text files
+    */
+    KRONA_CLEANUP( ch_krona_text )
+    ch_cleaned_krona_text = KRONA_CLEANUP.out.txt
+    ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() )
+
+    /*
+        Convert Krona text files into html Krona visualizations
+    */
+    ch_krona_text_for_import = ch_cleaned_krona_text
+        .map{[[id: it[0]['db_name']], it[1]]}
+        .groupTuple()
+    KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
+    ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
+    ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
+
+    emit:
+    html = ch_krona_html
+    versions = ch_versions
+}
--- a/workflows/taxprofiler.nf
+++ b/workflows/taxprofiler.nf
@ -33,6 +33,8 @@ if (params.hostremoval_reference           ) { ch_reference = file(params.hostre
 if (params.shortread_hostremoval_index     ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index    ) } else { ch_shortread_reference_index = [] }
 if (params.longread_hostremoval_index      ) { ch_longread_reference_index  = file(params.longread_hostremoval_index     ) } else { ch_longread_reference_index  = [] }

+if (params.diamond_save_reads              ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    CONFIG FILES
@ -60,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL         } from '../subworkflows/local/shortread_
 include { LONGREAD_HOSTREMOVAL          } from '../subworkflows/local/longread_hostremoval'
 include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
 include { PROFILING                     } from '../subworkflows/local/profiling'
+include { VISUALIZATION_KRONA           } from '../subworkflows/local/visualization_krona'

 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -207,6 +210,14 @@ workflow TAXPROFILER {
    PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
    ch_versions = ch_versions.mix( PROFILING.out.versions )

+    /*
+        SUBWORKFLOW: VISUALIZATION_KRONA
+    */
+    if ( params.run_krona ) {
+        VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs )
+        ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
+    }
+
    /*
        MODULE: MultiQC
    */