1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-12-22 15:28:16 +00:00

Merge pull request #91 from MillironX/feature/kreport2krona

Add Krona output for Kraken classification
This commit is contained in:
James A. Fellows Yates 2022-06-27 16:55:55 +02:00 committed by GitHub
commit 5e554a3a0b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 288 additions and 0 deletions

View file

@ -36,6 +36,10 @@
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.

View file

@ -293,6 +293,14 @@ process {
]
}
withName: KRONA_KTIMPORTTEXT {
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
]
}
withName: METAPHLAN3 {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }

View file

@ -37,6 +37,7 @@ params {
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_krona = true
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true

View file

@ -37,6 +37,7 @@ params {
run_metaphlan3 = true
run_centrifuge = true
run_diamond = true
run_krona = true
}
process {

View file

@ -48,6 +48,12 @@
"kraken2/kraken2": {
"git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
},
"krakentools/kreport2krona": {
"git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422"
},
"krona/ktimporttext": {
"git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
},
"malt/run": {
"git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
},

View file

@ -0,0 +1,40 @@
process KRONA_CLEANUP {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# Copy the file to a new name
cp ${krona} ${prefix}.txt
# Remove ugly 'x__' prefixes for each of the taxonomic levels
LEVELS=(d k p c o f g s)
for L in "\${LEVELS[@]}"; do
sed -i "s/\${L}__//g" ${prefix}.txt
done
# Remove underscores that are standing in place of spaces
sed -i "s/_/ /g" ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,36 @@
def VERSION = '1.2' // Version information not provided by tool on CLI
process KRAKENTOOLS_KREPORT2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
input:
tuple val(meta), path(kreport)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kreport2krona.py \\
-r ${kreport} \\
-o ${prefix}.txt \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kreport2krona.py: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: krakentools_kreport2krona
description: Takes a Kraken report file and prints out a krona-compatible TEXT file
keywords:
- kraken
- krona
- metagenomics
- visualization
tools:
- krakentools:
description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
homepage: https://github.com/jenniferlu717/KrakenTools
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kreport:
type: file
description: Kraken report
pattern: "*.{txt,kreport}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- krona:
type: file
description: Krona text-based input file converted from Kraken report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -0,0 +1,34 @@
process KRONA_KTIMPORTTEXT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
input:
tuple val(meta), path(report)
output:
tuple val(meta), path ('*.html'), emit: html
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
ktImportText \\
$args \\
-o ${prefix}.html \\
$report
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "krona_ktimporttext"
description: Creates a Krona chart from text files listing quantities and lineages.
keywords:
- plot
- taxonomy
- interactive
- html
- visualisation
- krona chart
- metagenomics
tools:
- krona:
description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
homepage: https://github.com/marbl/Krona/wiki/KronaTools
documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
tool_dev_url: https://github.com/marbl/Krona
doi: 10.1186/1471-2105-12-385
licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- report:
type: file
description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: A html file containing an interactive krona plot.
pattern: "*.{html}"
authors:
- "@jianhong"

View file

@ -125,6 +125,9 @@ params {
run_diamond = false
diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated!
// krona
run_krona = false
}
// Load base.config by default for all pipelines

View file

@ -433,6 +433,9 @@
},
"diamond_save_reads": {
"type": "boolean"
},
"run_krona": {
"type": "boolean"
}
}
}

View file

@ -0,0 +1,55 @@
//
// Create Krona visualizations
//
include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main'
include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup'
include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main'
workflow VISUALIZATION_KRONA {
take:
profiles
main:
ch_krona_text = Channel.empty()
ch_krona_html = Channel.empty()
ch_versions = Channel.empty()
/*
Split profile results based on tool they come from
*/
ch_input_profiles = profiles
.branch {
kraken2: it[0]['tool'] == 'kraken2'
unknown: true
}
/*
Convert Kraken2 formatted reports into Krona text files
*/
ch_kraken_reports = ch_input_profiles.kraken2
KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports )
ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() )
/*
Remove taxonomy level annotations from the Krona text files
*/
KRONA_CLEANUP( ch_krona_text )
ch_cleaned_krona_text = KRONA_CLEANUP.out.txt
ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() )
/*
Convert Krona text files into html Krona visualizations
*/
ch_krona_text_for_import = ch_cleaned_krona_text
.map{[[id: it[0]['db_name']], it[1]]}
.groupTuple()
KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
emit:
html = ch_krona_html
versions = ch_versions
}

View file

@ -62,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -209,6 +210,14 @@ workflow TAXPROFILER {
PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( PROFILING.out.versions )
/*
SUBWORKFLOW: VISUALIZATION_KRONA
*/
if ( params.run_krona ) {
VISUALIZATION_KRONA ( PROFILING.out.profiles )
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
/*
MODULE: MultiQC
*/