diff --git a/CITATIONS.md b/CITATIONS.md index 8044658..1ce4ec2 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -36,6 +36,10 @@ > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. +- [Krona](https://doi.org/10.1186/1471-2105-12-385) + + > Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385. + - [MALT](https://doi.org/10.1038/s41559-017-0446-6) > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d10ee90..47c6452 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -48,7 +48,7 @@ def check_samplesheet(file_in, file_out): 2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz, """ - FQ_EXTENSIONS = (".fq", ".fq.gz", ".fastq", ".fastq.gz") + FQ_EXTENSIONS = (".fq.gz", ".fastq.gz") FA_EXTENSIONS = ( ".fa", ".fa.gz", diff --git a/conf/modules.config b/conf/modules.config index 66967e2..3e96760 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -289,7 +289,15 @@ process { publishDir = [ path: { "${params.outdir}/kraken2/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.{txt}' + pattern: '*.{txt,report,fastq.gz}' + ] + } + + withName: KRONA_KTIMPORTTEXT { + publishDir = [ + path: { "${params.outdir}/krona" }, + mode: params.publish_dir_mode, + pattern: '*.{html}' ] } @@ -307,7 +315,7 @@ process { publishDir = [ path: { "${params.outdir}/centrifuge/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.txt' + pattern: '*.{txt,sam,gz}' ] ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } @@ -343,6 +351,10 @@ process { ] } + withName: KAIJU_KAIJU2KRONA { + ext.args = '-v -u' + } + withName: DIAMOND_BLASTX { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } diff --git a/conf/test.config b/conf/test.config index 3d7d51b..564c0e8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -38,10 +38,18 @@ params { run_centrifuge = true run_diamond = true run_motus = false + run_krona = true + malt_save_reads = true + kraken2_save_reads = true + centrifuge_save_reads = true + diamond_save_reads = true } process { withName: MALT_RUN { maxForks = 1 } + withName: MEGAN_RMA2INFO { + maxForks = 1 + } } diff --git a/conf/test_nopreprocessing.config b/conf/test_nopreprocessing.config index d93cf77..3908b56 100644 --- a/conf/test_nopreprocessing.config +++ b/conf/test_nopreprocessing.config @@ -38,6 +38,7 @@ params { run_centrifuge = true run_diamond = true run_motus = false + run_krona = true } process { diff --git a/docs/usage.md b/docs/usage.md index 4090cf2..1bcd235 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,6 +12,8 @@ nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers). +> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended) + You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below. This samplesheet is then specified on the command line as follows: @@ -219,6 +221,12 @@ Activating this functionality will concatenate the FASTQ files with the same sam You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`. +##### Profiling + +###### MALT + +nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/modules.json b/modules.json index 758ac22..2524d25 100644 --- a/modules.json +++ b/modules.json @@ -28,7 +28,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "diamond/blastx": { - "git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de" + "git_sha": "bd3bfe0817246082525ab93707976676b1fe208b" }, "fastp": { "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" @@ -42,14 +42,23 @@ "kaiju/kaiju": { "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" }, + "kaiju/kaiju2krona": { + "git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f" + }, "kaiju/kaiju2table": { "git_sha": "538dbac98ba9c8f799536cd5a617195501439457" }, "kraken2/kraken2": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" + }, + "krakentools/kreport2krona": { + "git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422" + }, + "krona/ktimporttext": { + "git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2" }, "malt/run": { - "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" + "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5" }, "megan/rma2info": { "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" diff --git a/modules/local/krona_cleanup.nf b/modules/local/krona_cleanup.nf new file mode 100644 index 0000000..804cb69 --- /dev/null +++ b/modules/local/krona_cleanup.nf @@ -0,0 +1,40 @@ +process KRONA_CLEANUP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt') + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # Copy the file to a new name + cp ${krona} ${prefix}.txt + + # Remove ugly 'x__' prefixes for each of the taxonomic levels + LEVELS=(d k p c o f g s) + for L in "\${LEVELS[@]}"; do + sed -i "s/\${L}__//g" ${prefix}.txt + done + + # Remove underscores that are standing in place of spaces + sed -i "s/_/ /g" ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/diamond/blastx/main.nf b/modules/nf-core/modules/diamond/blastx/main.nf index 6703c1e..d327227 100644 --- a/modules/nf-core/modules/diamond/blastx/main.nf +++ b/modules/nf-core/modules/diamond/blastx/main.nf @@ -2,21 +2,26 @@ process DIAMOND_BLASTX { tag "$meta.id" label 'process_medium' - // Dimaond is limited to v2.0.9 because there is not a - // singularity version higher than this at the current time. - conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null) + conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' : - 'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }" + 'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' : + 'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }" input: tuple val(meta), path(fasta) path db - val outext + val out_ext + val blast_columns output: - tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path('*.blast'), optional: true, emit: blast + tuple val(meta), path('*.xml') , optional: true, emit: xml + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.daa') , optional: true, emit: daa + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.paf') , optional: true, emit: paf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,7 +29,8 @@ process DIAMOND_BLASTX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - switch ( outext ) { + def columns = blast_columns ? "${blast_columns}" : '' + switch ( out_ext ) { case "blast": outfmt = 0; break case "xml": outfmt = 5; break case "txt": outfmt = 6; break @@ -32,6 +38,11 @@ process DIAMOND_BLASTX { case "sam": outfmt = 101; break case "tsv": outfmt = 102; break case "paf": outfmt = 103; break + default: + outfmt = '6'; + out_ext = 'txt'; + log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)"); + break } """ DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` @@ -41,9 +52,9 @@ process DIAMOND_BLASTX { --threads $task.cpus \\ --db \$DB \\ --query $fasta \\ - --outfmt ${outfmt} \\ + --outfmt ${outfmt} ${columns} \\ $args \\ - --out ${prefix}.${outext} + --out ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/diamond/blastx/meta.yml b/modules/nf-core/modules/diamond/blastx/meta.yml index 5ee2d55..2dcd7bc 100644 --- a/modules/nf-core/modules/diamond/blastx/meta.yml +++ b/modules/nf-core/modules/diamond/blastx/meta.yml @@ -28,7 +28,7 @@ input: type: directory description: Directory containing the nucelotide blast database pattern: "*" - - outext: + - out_ext: type: string description: | Specify the type of output file to be generated. `blast` corresponds to @@ -38,10 +38,34 @@ input: pattern: "blast|xml|txt|daa|sam|tsv|paf" output: + - blast: + type: file + description: File containing blastp hits + pattern: "*.{blast}" + - xml: + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: type: file - description: File containing blastx hits - pattern: "*.{blastx.txt}" + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" + - daa: + type: file + description: File containing hits DAA format + pattern: "*.{daa}" + - sam: + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" + - tsv: + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" + - paf: + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/main.nf b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf new file mode 100644 index 0000000..c95d5a7 --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/main.nf @@ -0,0 +1,39 @@ +process KAIJU_KAIJU2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1': + 'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }" + + input: + tuple val(meta), path(tsv) + path(db) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dbnodes=`find -L ${db} -name "*nodes.dmp"` + dbnames=`find -L ${db} -name "*names.dmp"` + kaiju2krona \\ + $args \\ + -t \$dbnodes \\ + -n \$dbnames \\ + -i ${tsv} \\ + -o ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml new file mode 100644 index 0000000..a0dc2fd --- /dev/null +++ b/modules/nf-core/modules/kaiju/kaiju2krona/meta.yml @@ -0,0 +1,44 @@ +name: kaiju_kaiju2krona +description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona. +keywords: + - taxonomy + - visualisation + - krona chart + - metagenomics +tools: + - "kaiju": + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: Kaiju tab-separated output file + pattern: "*.{tsv,txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Krona text-based input file converted from Kaiju report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index 3ec5df5..d400023 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 { input: tuple val(meta), path(reads) path db + val save_output_fastqs + val save_reads_assignment output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*report.txt') , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ + $unclassified_command \\ + $classified_command \\ + $readclassification_command \\ $paired \\ $args \\ $reads - pigz -p $task.cpus *.fastq + $compress_reads_command cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/kraken2/kraken2/meta.yml b/modules/nf-core/modules/kraken2/kraken2/meta.yml index 9d6a385..7129fe3 100644 --- a/modules/nf-core/modules/kraken2/kraken2/meta.yml +++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml @@ -27,25 +27,40 @@ input: - db: type: directory description: Kraken2 database + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified: + - classified_reads_fastq: type: file description: | - Reads classified to belong to any of the taxa + Reads classified as belonging to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - unclassified: + - unclassified_reads_fastq: type: file description: | - Reads not classified to belong to any of the taxa + Reads not classified to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - txt: + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: type: file description: | Kraken2 report containing stats about classified diff --git a/modules/nf-core/modules/krakentools/kreport2krona/main.nf b/modules/nf-core/modules/krakentools/kreport2krona/main.nf new file mode 100644 index 0000000..3bf46ee --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/main.nf @@ -0,0 +1,36 @@ +def VERSION = '1.2' // Version information not provided by tool on CLI + +process KRAKENTOOLS_KREPORT2KRONA { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krakentools=1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreport) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kreport2krona.py \\ + -r ${kreport} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kreport2krona.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krakentools/kreport2krona/meta.yml b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml new file mode 100644 index 0000000..2f8a163 --- /dev/null +++ b/modules/nf-core/modules/krakentools/kreport2krona/meta.yml @@ -0,0 +1,41 @@ +name: krakentools_kreport2krona +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krona + - metagenomics + - visualization +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreport: + type: file + description: Kraken report + pattern: "*.{txt,kreport}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - krona: + type: file + description: Krona text-based input file converted from Kraken report + pattern: "*.{txt,krona}" + +authors: + - "@MillironX" diff --git a/modules/nf-core/modules/krona/ktimporttext/main.nf b/modules/nf-core/modules/krona/ktimporttext/main.nf new file mode 100644 index 0000000..de0cfc2 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/main.nf @@ -0,0 +1,34 @@ +process KRONA_KTIMPORTTEXT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::krona=2.8.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1': + 'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }" + + input: + tuple val(meta), path(report) + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + ktImportText \\ + $args \\ + -o ${prefix}.html \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/krona/ktimporttext/meta.yml b/modules/nf-core/modules/krona/ktimporttext/meta.yml new file mode 100644 index 0000000..a7108e0 --- /dev/null +++ b/modules/nf-core/modules/krona/ktimporttext/meta.yml @@ -0,0 +1,47 @@ +name: "krona_ktimporttext" +description: Creates a Krona chart from text files listing quantities and lineages. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart + - metagenomics +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + tool_dev_url: https://github.com/marbl/Krona + doi: 10.1186/1471-2105-12-385 + licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - report: + type: file + description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored." + pattern: "*.{txt}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/modules/malt/run/main.nf b/modules/nf-core/modules/malt/run/main.nf index 4e2e50c..2b91d90 100644 --- a/modules/nf-core/modules/malt/run/main.nf +++ b/modules/nf-core/modules/malt/run/main.nf @@ -2,10 +2,10 @@ process MALT_RUN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::malt=0.53" : null) + conda (params.enable_conda ? "bioconda::malt=0.41" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' : - 'quay.io/biocontainers/malt:0.53--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/malt:0.41--1' : + 'quay.io/biocontainers/malt:0.41--1' }" input: tuple val(meta), path(fastqs) @@ -33,7 +33,6 @@ process MALT_RUN { """ malt-run \\ - -J-Xmx${avail_mem}g \\ -t $task.cpus \\ -v \\ -o . \\ diff --git a/nextflow.config b/nextflow.config index 7f9f4c1..a5616e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -102,16 +102,17 @@ params { // MALT run_malt = false malt_mode = 'BlastN' - malt_generatemegansummary = false + malt_generate_megansummary = false + malt_save_reads = false // kraken2 - run_kraken2 = false + run_kraken2 = false + kraken2_save_reads = false + kraken2_save_readclassification = false // centrifuge run_centrifuge = false - centrifuge_save_unaligned = false - centrifuge_save_aligned = false - centrifuge_sam_format = false + centrifuge_save_reads = false // metaphlan3 run_metaphlan3 = false @@ -122,10 +123,14 @@ params { // diamond run_diamond = false - diamond_output_format = 'txt' + diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently + diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated! // mOTUs run_motus = false + + // krona + run_krona = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index b380b84..e857ec8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -260,6 +260,8 @@ "properties": { "databases": { "type": "string", + "mimetype": "text/csv", + "format": "file-path", "default": "None" }, "shortread_qc_excludeunmerged": { @@ -278,15 +280,6 @@ "run_centrifuge": { "type": "boolean" }, - "centrifuge_save_unaligned": { - "type": "boolean" - }, - "centrifuge_save_aligned": { - "type": "boolean" - }, - "centrifuge_sam_format": { - "type": "boolean" - }, "run_metaphlan3": { "type": "boolean", "description": "Enable MetaPhlAn for taxonomic profiling" @@ -386,7 +379,7 @@ "run_kaiju": { "type": "boolean" }, - "malt_generatemegansummary": { + "malt_generate_megansummary": { "type": "boolean" }, "kaiju_taxon_name": { @@ -430,6 +423,24 @@ }, "run_motus": { "type": "boolean" + }, + "malt_save_reads": { + "type": "boolean" + }, + "kraken2_save_reads": { + "type": "boolean" + }, + "kraken2_save_readclassification": { + "type": "boolean" + }, + "centrifuge_save_reads": { + "type": "boolean" + }, + "diamond_save_reads": { + "type": "boolean" + }, + "run_krona": { + "type": "boolean" } } } diff --git a/subworkflows/local/longread_preprocessing.nf b/subworkflows/local/longread_preprocessing.nf index 5ae5417..6a23b0e 100644 --- a/subworkflows/local/longread_preprocessing.nf +++ b/subworkflows/local/longread_preprocessing.nf @@ -48,7 +48,7 @@ workflow LONGREAD_PREPROCESSING { } - FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") ) + FASTQC_PROCESSED ( ch_processed_reads ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) emit: diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 2813bc7..60963c9 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -21,7 +21,8 @@ workflow PROFILING { main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_raw_profiles = Channel.empty() + ch_raw_classifications = Channel.empty() + ch_raw_profiles = Channel.empty() /* COMBINE READS WITH POSSIBLE DATABASES @@ -62,14 +63,29 @@ workflow PROFILING { // MALT: We groupTuple to have all samples in one channel for MALT as database // loading takes a long time, so we only want to run it once per database - // TODO document somewhere we only accept illumina short reads for MALT? ch_input_for_malt = ch_input_for_profiling.malt .filter { it[0]['instrument_platform'] == 'ILLUMINA' } .map { - it -> - def temp_meta = [ id: it[2]['db_name']] + it[2] - def db = it[3] - [ temp_meta, it[1], db ] + meta, reads, db_meta, db -> + + // Reset entire input meta for MALT to just database name, + // as we don't run run on a per-sample basis due to huge datbaases + // so all samples are in one run and so sample-specific metadata + // unnecessary. Set as database name to prevent `null` job ID and prefix. + def temp_meta = [ id: meta['db_name'] ] + + // Extend database parameters to specify whether to save alignments or not + def new_db_meta = db_meta.clone() + def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : "" + new_db_meta['db_params'] = db_meta['db_params'] + sam_format + + // Combine reduced sample metadata with updated database parameters metadata, + // make sure id is db_name for publishing purposes. + def new_meta = temp_meta + new_db_meta + new_meta['id'] = new_meta['db_name'] + + [ new_meta, reads, db ] + } .groupTuple(by: [0,2]) .multiMap { @@ -93,10 +109,11 @@ workflow PROFILING { [ meta_new, rma ] } - MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) + MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan ) + ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) } @@ -109,10 +126,11 @@ workflow PROFILING { db: it[3] } - KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) + KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report ) } @@ -129,10 +147,11 @@ workflow PROFILING { db: it[3] } - CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) + CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) + ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) } @@ -168,6 +187,7 @@ workflow PROFILING { KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } @@ -181,9 +201,13 @@ workflow PROFILING { db: it[3] } - DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format ) + // diamond only accepts single output file specification, therefore + // this will replace output file! + ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format + + DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] ) ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output ) + ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv ) } @@ -207,7 +231,8 @@ workflow PROFILING { } emit: - profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom - versions = ch_versions // channel: [ versions.yml ] - mqc = ch_multiqc_files + classifications = ch_raw_classifications + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files } diff --git a/subworkflows/local/visualization_krona.nf b/subworkflows/local/visualization_krona.nf new file mode 100644 index 0000000..c5ca97a --- /dev/null +++ b/subworkflows/local/visualization_krona.nf @@ -0,0 +1,84 @@ +// +// Create Krona visualizations +// + +include { KAIJU_KAIJU2KRONA } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main' +include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main' +include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup' +include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main' + +workflow VISUALIZATION_KRONA { + take: + classifications + profiles + databases + + main: + ch_krona_text = Channel.empty() + ch_krona_html = Channel.empty() + ch_versions = Channel.empty() + + /* + Split profile results based on tool they come from + */ + ch_input_profiles = profiles + .branch { + centrifuge: it[0]['tool'] == 'centrifuge' + kraken2: it[0]['tool'] == 'kraken2' + unknown: true + } + ch_input_classifications = classifications + .branch { + kaiju: it[0]['tool'] == 'kaiju' + unknown: true + } + + /* + Convert Kraken2 formatted reports into Krona text files + */ + ch_kraken_reports = ch_input_profiles.kraken2 + .mix( ch_input_profiles.centrifuge ) + KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports ) + ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt ) + ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() ) + + /* + Combine Kaiju profiles with their databases + */ + ch_input_for_kaiju2krona = ch_input_classifications.kaiju + .map{ [it[0]['db_name'], it[0], it[1]] } + .combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 ) + .multiMap{ + it -> + profiles: [it[1], it[2]] + db: it[3] + } + + /* + Convert Kaiju formatted reports into Krona text files + */ + KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db ) + ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt ) + ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() ) + + /* + Remove taxonomy level annotations from the Krona text files + */ + KRONA_CLEANUP( ch_krona_text ) + ch_cleaned_krona_text = KRONA_CLEANUP.out.txt + ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() ) + + /* + Convert Krona text files into html Krona visualizations + */ + ch_krona_text_for_import = ch_cleaned_krona_text + .map{[[id: it[0]['db_name']], it[1]]} + .groupTuple() + KRONA_KTIMPORTTEXT( ch_krona_text_for_import ) + ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html ) + ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() ) + + emit: + html = ch_krona_html + versions = ch_versions +} diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 4b13ca8..7eec13d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -33,6 +33,8 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } +if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available." + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -60,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_ include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { PROFILING } from '../subworkflows/local/profiling' +include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -207,6 +210,14 @@ workflow TAXPROFILER { PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) ch_versions = ch_versions.mix( PROFILING.out.versions ) + /* + SUBWORKFLOW: VISUALIZATION_KRONA + */ + if ( params.run_krona ) { + VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs ) + ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions ) + } + /* MODULE: MultiQC */