diff --git a/CITATIONS.md b/CITATIONS.md index 8044658..1ce4ec2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,10 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. +- [Krona](https://doi.org/10.1186/1471-2105-12-385) + + > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385. + - [MALT](https://doi.org/10.1038/s41559-017-0446-6) > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. diff --git a/conf/modules.config b/conf/modules.config index 46320cf..f9d0329 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -293,6 +293,14 @@ process { ] } + withName: KRONA_KTIMPORTTEXT { + publishDir = [ + path: { "${params.outdir}/krona" }, + mode: params.publish_dir_mode, + pattern: '*.{html}' + ] + } + withName: METAPHLAN3 { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } diff --git a/conf/test.config b/conf/test.config index e9fa62e..04dfb4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krona = true malt_save_reads = true kraken2_save_reads = true centrifuge_save_reads = true diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index 60cdde8..92fd29d 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -37,6 +37,7 @@ params { run_metaphlan3 = true run_centrifuge = true run_diamond = true + run_krona = true } process { diff --git a/modules.json b/modules.json index 9561b0f..d93eb1b 100644 --- a/modules.json +++ b/modules.json @@ -48,6 +48,12 @@ "kraken2/kraken2": { "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, + "krakentools/kreport2krona": { + "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422" + }, + "krona/ktimporttext": { + "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2" + }, "malt/run": { "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5" }, diff --git a/modules/local/krona_cleanup.nf b/modules/local/krona_cleanup.nf new file mode 100644 index 0000000..804cb69 --- /dev/null +++ b/modules/local/krona_cleanup.nf @@ -0,0 +1,40 @@ +process KRONA_CLEANUP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt') + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Copy the file to a new name + cp ${krona} ${prefix}.txt + + # Remove ugly 'x__' prefixes for each of the taxonomic levels + LEVELS=(d k p c o f g s) + for L in "\${LEVELS[@]}"; do + sed -i "s/\${L}__//g" ${prefix}.txt + done + + # Remove underscores that are standing in place of spaces + sed -i "s/_/ /g" ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krakentools/kreport2krona/main.nf b/modules/nf-core/modules/krakentools/kreport2krona/main.nf new file mode 100644 index 0000000..3bf46ee --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf @@ -0,0 +1,36 @@ +def VERSION = '1.2' // Version information not provided by tool on CLI + +process KRAKENTOOLS_KREPORT2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krakentools=1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreport) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kreport2krona.py \\ + -r ${kreport} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kreport2krona.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krakentools/kreport2krona/meta.yml b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml new file mode 100644 index 0000000..2f8a163 --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml @@ -0,0 +1,41 @@ +name: krakentools_kreport2krona +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krona + - metagenomics + - visualization +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreport: + type: file + description: Kraken report + pattern: "*.{txt,kreport}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - krona: + type: file + description: Krona text-based input file converted from Kraken report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" diff --git a/modules/nf-core/modules/krona/ktimporttext/main.nf b/modules/nf-core/modules/krona/ktimporttext/main.nf new file mode 100644 index 0000000..de0cfc2 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/main.nf @@ -0,0 +1,34 @@ +process KRONA_KTIMPORTTEXT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krona=2.8.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1': + 'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(report) + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + ktImportText \\ + $args \\ + -o ${prefix}.html \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krona/ktimporttext/meta.yml b/modules/nf-core/modules/krona/ktimporttext/meta.yml new file mode 100644 index 0000000..a7108e0 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/meta.yml @@ -0,0 +1,47 @@ +name: "krona_ktimporttext" +description: Creates a Krona chart from text files listing quantities and lineages. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart + - metagenomics +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + tool_dev_url: https://github.com/marbl/Krona + doi: 10.1186/1471-2105-12-385 + licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - report: + type: file + description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored." + pattern: "*.{txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + +authors: + - "@jianhong" diff --git a/nextflow.config b/nextflow.config index 73fd0b3..29c87cd 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,9 @@ params { run_diamond = false diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated! + + // krona + run_krona = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 2c00348..682bf2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -433,6 +433,9 @@ }, "diamond_save_reads": { "type": "boolean" + }, + "run_krona": { + "type": "boolean" } } } diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf new file mode 100644 index 0000000..87db112 --- /dev/null +++ b/subworkflows/local/visualization_krona.nf @@ -0,0 +1,55 @@ +// +// Create Krona visualizations +// + +include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main' +include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup' +include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main' + +workflow VISUALIZATION_KRONA { + take: + profiles + + main: + ch_krona_text = Channel.empty() + ch_krona_html = Channel.empty() + ch_versions = Channel.empty() + + /* + Split profile results based on tool they come from + */ + ch_input_profiles = profiles + .branch { + kraken2: it[0]['tool'] == 'kraken2' + unknown: true + } + + /* + Convert Kraken2 formatted reports into Krona text files + */ + ch_kraken_reports = ch_input_profiles.kraken2 + KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports ) + ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) + + /* + Remove taxonomy level annotations from the Krona text files + */ + KRONA_CLEANUP( ch_krona_text ) + ch_cleaned_krona_text = KRONA_CLEANUP.out.txt + ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() ) + + /* + Convert Krona text files into html Krona visualizations + */ + ch_krona_text_for_import = ch_cleaned_krona_text + .map{[[id: it[0]['db_name']], it[1]]} + .groupTuple() + KRONA_KTIMPORTTEXT( ch_krona_text_for_import ) + ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html ) + ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() ) + + emit: + html = ch_krona_html + versions = ch_versions +} diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index f29a366..f7ef07a 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -62,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' +include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -209,6 +210,14 @@ workflow TAXPROFILER { PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) + /* + SUBWORKFLOW: VISUALIZATION_KRONA + */ + if ( params.run_krona ) { + VISUALIZATION_KRONA ( PROFILING.out.profiles ) + ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) + } + /* MODULE: MultiQC */