1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-25 22:59:56 +00:00

Merge branch 'dev' into motus

This commit is contained in:
James A. Fellows Yates 2022-07-04 13:14:42 +02:00 committed by GitHub
commit 26e31c782d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 592 additions and 75 deletions

View file

@ -36,6 +36,10 @@
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0. > Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6) - [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.

View file

@ -48,7 +48,7 @@ def check_samplesheet(file_in, file_out):
2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz, 2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
""" """
FQ_EXTENSIONS = (".fq", ".fq.gz", ".fastq", ".fastq.gz") FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
FA_EXTENSIONS = ( FA_EXTENSIONS = (
".fa", ".fa",
".fa.gz", ".fa.gz",

View file

@ -289,7 +289,15 @@ process {
publishDir = [ publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" }, path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
pattern: '*.{txt}' pattern: '*.{txt,report,fastq.gz}'
]
}
withName: KRONA_KTIMPORTTEXT {
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
] ]
} }
@ -307,7 +315,7 @@ process {
publishDir = [ publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" }, path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
pattern: '*.txt' pattern: '*.{txt,sam,gz}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
@ -343,6 +351,10 @@ process {
] ]
} }
withName: KAIJU_KAIJU2KRONA {
ext.args = '-v -u'
}
withName: DIAMOND_BLASTX { withName: DIAMOND_BLASTX {
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }

View file

@ -38,10 +38,18 @@ params {
run_centrifuge = true run_centrifuge = true
run_diamond = true run_diamond = true
run_motus = false run_motus = false
run_krona = true
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
} }
process { process {
withName: MALT_RUN { withName: MALT_RUN {
maxForks = 1 maxForks = 1
} }
withName: MEGAN_RMA2INFO {
maxForks = 1
}
} }

View file

@ -38,6 +38,7 @@ params {
run_centrifuge = true run_centrifuge = true
run_diamond = true run_diamond = true
run_motus = false run_motus = false
run_krona = true
} }
process { process {

View file

@ -12,6 +12,8 @@
nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers). nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below. You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
This samplesheet is then specified on the command line as follows: This samplesheet is then specified on the command line as follows:
@ -219,6 +221,12 @@ Activating this functionality will concatenate the FASTQ files with the same sam
You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`. You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.
##### Profiling
###### MALT
nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
### Updating the pipeline ### Updating the pipeline
When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

View file

@ -28,7 +28,7 @@
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
}, },
"diamond/blastx": { "diamond/blastx": {
"git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de" "git_sha": "bd3bfe0817246082525ab93707976676b1fe208b"
}, },
"fastp": { "fastp": {
"git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789" "git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
@ -42,14 +42,23 @@
"kaiju/kaiju": { "kaiju/kaiju": {
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe" "git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
}, },
"kaiju/kaiju2krona": {
"git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f"
},
"kaiju/kaiju2table": { "kaiju/kaiju2table": {
"git_sha": "538dbac98ba9c8f799536cd5a617195501439457" "git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
}, },
"kraken2/kraken2": { "kraken2/kraken2": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" "git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
},
"krakentools/kreport2krona": {
"git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422"
},
"krona/ktimporttext": {
"git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
}, },
"malt/run": { "malt/run": {
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" "git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
}, },
"megan/rma2info": { "megan/rma2info": {
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"

View file

@ -0,0 +1,40 @@
process KRONA_CLEANUP {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# Copy the file to a new name
cp ${krona} ${prefix}.txt
# Remove ugly 'x__' prefixes for each of the taxonomic levels
LEVELS=(d k p c o f g s)
for L in "\${LEVELS[@]}"; do
sed -i "s/\${L}__//g" ${prefix}.txt
done
# Remove underscores that are standing in place of spaces
sed -i "s/_/ /g" ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -2,21 +2,26 @@ process DIAMOND_BLASTX {
tag "$meta.id" tag "$meta.id"
label 'process_medium' label 'process_medium'
// Dimaond is limited to v2.0.9 because there is not a conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
// singularity version higher than this at the current time.
conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' : 'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }" 'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
input: input:
tuple val(meta), path(fasta) tuple val(meta), path(fasta)
path db path db
val outext val out_ext
val blast_columns
output: output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output tuple val(meta), path('*.blast'), optional: true, emit: blast
path "versions.yml" , emit: versions tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -24,7 +29,8 @@ process DIAMOND_BLASTX {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) { def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break case "blast": outfmt = 0; break
case "xml": outfmt = 5; break case "xml": outfmt = 5; break
case "txt": outfmt = 6; break case "txt": outfmt = 6; break
@ -32,6 +38,11 @@ process DIAMOND_BLASTX {
case "sam": outfmt = 101; break case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
} }
""" """
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'` DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -41,9 +52,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\ --threads $task.cpus \\
--db \$DB \\ --db \$DB \\
--query $fasta \\ --query $fasta \\
--outfmt ${outfmt} \\ --outfmt ${outfmt} ${columns} \\
$args \\ $args \\
--out ${prefix}.${outext} --out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory type: directory
description: Directory containing the nucelotide blast database description: Directory containing the nucelotide blast database
pattern: "*" pattern: "*"
- outext: - out_ext:
type: string type: string
description: | description: |
Specify the type of output file to be generated. `blast` corresponds to Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf" pattern: "blast|xml|txt|daa|sam|tsv|paf"
output: output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt: - txt:
type: file type: file
description: File containing blastx hits description: File containing hits in tabular BLAST format.
pattern: "*.{blastx.txt}" pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions

View file

@ -0,0 +1,39 @@
process KAIJU_KAIJU2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(tsv)
path(db)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbnames=`find -L ${db} -name "*names.dmp"`
kaiju2krona \\
$args \\
-t \$dbnodes \\
-n \$dbnames \\
-i ${tsv} \\
-o ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,44 @@
name: kaiju_kaiju2krona
description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona.
keywords:
- taxonomy
- visualisation
- krona chart
- metagenomics
tools:
- "kaiju":
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tsv:
type: file
description: Kaiju tab-separated output file
pattern: "*.{tsv,txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Krona text-based input file converted from Kaiju report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path db path db
val save_output_fastqs
val save_reads_assignment
output: output:
tuple val(meta), path('*classified*') , emit: classified tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*unclassified*'), emit: unclassified tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*report.txt') , emit: txt tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
path "versions.yml" , emit: versions tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
def paired = meta.single_end ? "" : "--paired" def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
""" """
kraken2 \\ kraken2 \\
--db $db \\ --db $db \\
--threads $task.cpus \\ --threads $task.cpus \\
--unclassified-out $unclassified \\
--classified-out $classified \\
--report ${prefix}.kraken2.report.txt \\ --report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\ --gzip-compressed \\
$unclassified_command \\
$classified_command \\
$readclassification_command \\
$paired \\ $paired \\
$args \\ $args \\
$reads $reads
pigz -p $task.cpus *.fastq $compress_reads_command
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":

View file

@ -27,25 +27,40 @@ input:
- db: - db:
type: directory type: directory
description: Kraken2 database description: Kraken2 database
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output: output:
- meta: - meta:
type: map type: map
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- classified: - classified_reads_fastq:
type: file type: file
description: | description: |
Reads classified to belong to any of the taxa Reads classified as belonging to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- unclassified: - unclassified_reads_fastq:
type: file type: file
description: | description: |
Reads not classified to belong to any of the taxa Reads not classified to any of the taxa
on the Kraken2 database. on the Kraken2 database.
pattern: "*{fastq.gz}" pattern: "*{fastq.gz}"
- txt: - classified_reads_assignment:
type: file
description: |
Kraken2 output file indicating the taxonomic assignment of
each input read
- report:
type: file type: file
description: | description: |
Kraken2 report containing stats about classified Kraken2 report containing stats about classified

View file

@ -0,0 +1,36 @@
def VERSION = '1.2' // Version information not provided by tool on CLI
process KRAKENTOOLS_KREPORT2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
input:
tuple val(meta), path(kreport)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kreport2krona.py \\
-r ${kreport} \\
-o ${prefix}.txt \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kreport2krona.py: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: krakentools_kreport2krona
description: Takes a Kraken report file and prints out a krona-compatible TEXT file
keywords:
- kraken
- krona
- metagenomics
- visualization
tools:
- krakentools:
description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
homepage: https://github.com/jenniferlu717/KrakenTools
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kreport:
type: file
description: Kraken report
pattern: "*.{txt,kreport}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- krona:
type: file
description: Krona text-based input file converted from Kraken report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -0,0 +1,34 @@
process KRONA_KTIMPORTTEXT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
input:
tuple val(meta), path(report)
output:
tuple val(meta), path ('*.html'), emit: html
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
ktImportText \\
$args \\
-o ${prefix}.html \\
$report
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "krona_ktimporttext"
description: Creates a Krona chart from text files listing quantities and lineages.
keywords:
- plot
- taxonomy
- interactive
- html
- visualisation
- krona chart
- metagenomics
tools:
- krona:
description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
homepage: https://github.com/marbl/Krona/wiki/KronaTools
documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
tool_dev_url: https://github.com/marbl/Krona
doi: 10.1186/1471-2105-12-385
licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- report:
type: file
description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: A html file containing an interactive krona plot.
pattern: "*.{html}"
authors:
- "@jianhong"

View file

@ -2,10 +2,10 @@ process MALT_RUN {
tag "$meta.id" tag "$meta.id"
label 'process_high' label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.53" : null) conda (params.enable_conda ? "bioconda::malt=0.41" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' : 'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }" 'quay.io/biocontainers/malt:0.41--1' }"
input: input:
tuple val(meta), path(fastqs) tuple val(meta), path(fastqs)
@ -33,7 +33,6 @@ process MALT_RUN {
""" """
malt-run \\ malt-run \\
-J-Xmx${avail_mem}g \\
-t $task.cpus \\ -t $task.cpus \\
-v \\ -v \\
-o . \\ -o . \\

View file

@ -102,16 +102,17 @@ params {
// MALT // MALT
run_malt = false run_malt = false
malt_mode = 'BlastN' malt_mode = 'BlastN'
malt_generatemegansummary = false malt_generate_megansummary = false
malt_save_reads = false
// kraken2 // kraken2
run_kraken2 = false run_kraken2 = false
kraken2_save_reads = false
kraken2_save_readclassification = false
// centrifuge // centrifuge
run_centrifuge = false run_centrifuge = false
centrifuge_save_unaligned = false centrifuge_save_reads = false
centrifuge_save_aligned = false
centrifuge_sam_format = false
// metaphlan3 // metaphlan3
run_metaphlan3 = false run_metaphlan3 = false
@ -122,10 +123,14 @@ params {
// diamond // diamond
run_diamond = false run_diamond = false
diamond_output_format = 'txt' diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated!
// mOTUs // mOTUs
run_motus = false run_motus = false
// krona
run_krona = false
} }
// Load base.config by default for all pipelines // Load base.config by default for all pipelines

View file

@ -260,6 +260,8 @@
"properties": { "properties": {
"databases": { "databases": {
"type": "string", "type": "string",
"mimetype": "text/csv",
"format": "file-path",
"default": "None" "default": "None"
}, },
"shortread_qc_excludeunmerged": { "shortread_qc_excludeunmerged": {
@ -278,15 +280,6 @@
"run_centrifuge": { "run_centrifuge": {
"type": "boolean" "type": "boolean"
}, },
"centrifuge_save_unaligned": {
"type": "boolean"
},
"centrifuge_save_aligned": {
"type": "boolean"
},
"centrifuge_sam_format": {
"type": "boolean"
},
"run_metaphlan3": { "run_metaphlan3": {
"type": "boolean", "type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling" "description": "Enable MetaPhlAn for taxonomic profiling"
@ -386,7 +379,7 @@
"run_kaiju": { "run_kaiju": {
"type": "boolean" "type": "boolean"
}, },
"malt_generatemegansummary": { "malt_generate_megansummary": {
"type": "boolean" "type": "boolean"
}, },
"kaiju_taxon_name": { "kaiju_taxon_name": {
@ -430,6 +423,24 @@
}, },
"run_motus": { "run_motus": {
"type": "boolean" "type": "boolean"
},
"malt_save_reads": {
"type": "boolean"
},
"kraken2_save_reads": {
"type": "boolean"
},
"kraken2_save_readclassification": {
"type": "boolean"
},
"centrifuge_save_reads": {
"type": "boolean"
},
"diamond_save_reads": {
"type": "boolean"
},
"run_krona": {
"type": "boolean"
} }
} }
} }

View file

@ -48,7 +48,7 @@ workflow LONGREAD_PREPROCESSING {
} }
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") ) FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit: emit:

View file

@ -21,7 +21,8 @@ workflow PROFILING {
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
ch_raw_profiles = Channel.empty() ch_raw_classifications = Channel.empty()
ch_raw_profiles = Channel.empty()
/* /*
COMBINE READS WITH POSSIBLE DATABASES COMBINE READS WITH POSSIBLE DATABASES
@ -62,14 +63,29 @@ workflow PROFILING {
// MALT: We groupTuple to have all samples in one channel for MALT as database // MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database // loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' } .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map { .map {
it -> meta, reads, db_meta, db ->
def temp_meta = [ id: it[2]['db_name']] + it[2]
def db = it[3] // Reset entire input meta for MALT to just database name,
[ temp_meta, it[1], db ] // as we don't run run on a per-sample basis due to huge datbaases
// so all samples are in one run and so sample-specific metadata
// unnecessary. Set as database name to prevent `null` job ID and prefix.
def temp_meta = [ id: meta['db_name'] ]
// Extend database parameters to specify whether to save alignments or not
def new_db_meta = db_meta.clone()
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
// Combine reduced sample metadata with updated database parameters metadata,
// make sure id is db_name for publishing purposes.
def new_meta = temp_meta + new_db_meta
new_meta['id'] = new_meta['db_name']
[ new_meta, reads, db ]
} }
.groupTuple(by: [0,2]) .groupTuple(by: [0,2])
.multiMap { .multiMap {
@ -93,10 +109,11 @@ workflow PROFILING {
[ meta_new, rma ] [ meta_new, rma ]
} }
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() ) ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
} }
@ -109,10 +126,11 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
} }
@ -129,10 +147,11 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db) CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport ) ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
} }
@ -168,6 +187,7 @@ workflow PROFILING {
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name) KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
} }
@ -181,9 +201,13 @@ workflow PROFILING {
db: it[3] db: it[3]
} }
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format ) // diamond only accepts single output file specification, therefore
// this will replace output file!
ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() ) ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output ) ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv )
} }
@ -207,7 +231,8 @@ workflow PROFILING {
} }
emit: emit:
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom classifications = ch_raw_classifications
versions = ch_versions // channel: [ versions.yml ] profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
mqc = ch_multiqc_files versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
} }

View file

@ -0,0 +1,84 @@
//
// Create Krona visualizations
//
include { KAIJU_KAIJU2KRONA } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main'
include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main'
include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup'
include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main'
workflow VISUALIZATION_KRONA {
take:
classifications
profiles
databases
main:
ch_krona_text = Channel.empty()
ch_krona_html = Channel.empty()
ch_versions = Channel.empty()
/*
Split profile results based on tool they come from
*/
ch_input_profiles = profiles
.branch {
centrifuge: it[0]['tool'] == 'centrifuge'
kraken2: it[0]['tool'] == 'kraken2'
unknown: true
}
ch_input_classifications = classifications
.branch {
kaiju: it[0]['tool'] == 'kaiju'
unknown: true
}
/*
Convert Kraken2 formatted reports into Krona text files
*/
ch_kraken_reports = ch_input_profiles.kraken2
.mix( ch_input_profiles.centrifuge )
KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports )
ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() )
/*
Combine Kaiju profiles with their databases
*/
ch_input_for_kaiju2krona = ch_input_classifications.kaiju
.map{ [it[0]['db_name'], it[0], it[1]] }
.combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 )
.multiMap{
it ->
profiles: [it[1], it[2]]
db: it[3]
}
/*
Convert Kaiju formatted reports into Krona text files
*/
KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db )
ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt )
ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() )
/*
Remove taxonomy level annotations from the Krona text files
*/
KRONA_CLEANUP( ch_krona_text )
ch_cleaned_krona_text = KRONA_CLEANUP.out.txt
ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() )
/*
Convert Krona text files into html Krona visualizations
*/
ch_krona_text_for_import = ch_cleaned_krona_text
.map{[[id: it[0]['db_name']], it[1]]}
.groupTuple()
KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
emit:
html = ch_krona_html
versions = ch_versions
}

View file

@ -33,6 +33,8 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] } if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] } if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES CONFIG FILES
@ -60,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval' include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering' include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling' include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -207,6 +210,14 @@ workflow TAXPROFILER {
PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs ) PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( PROFILING.out.versions ) ch_versions = ch_versions.mix( PROFILING.out.versions )
/*
SUBWORKFLOW: VISUALIZATION_KRONA
*/
if ( params.run_krona ) {
VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
/* /*
MODULE: MultiQC MODULE: MultiQC
*/ */