1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 03:06:08 +00:00

Merge branch 'dev' into motus

This commit is contained in:
James A. Fellows Yates 2022-07-04 13:14:42 +02:00 committed by GitHub
commit 26e31c782d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 592 additions and 75 deletions

View file

@ -36,6 +36,10 @@
> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [Krona](https://doi.org/10.1186/1471-2105-12-385)
> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.

View file

@ -48,7 +48,7 @@ def check_samplesheet(file_in, file_out):
2613,ERR5766181,ILLUMINA,ERX5474937_ERR5766181_1.fastq.gz,ERX5474937_ERR5766181_2.fastq.gz,
"""
FQ_EXTENSIONS = (".fq", ".fq.gz", ".fastq", ".fastq.gz")
FQ_EXTENSIONS = (".fq.gz", ".fastq.gz")
FA_EXTENSIONS = (
".fa",
".fa.gz",

View file

@ -289,7 +289,15 @@ process {
publishDir = [
path: { "${params.outdir}/kraken2/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt}'
pattern: '*.{txt,report,fastq.gz}'
]
}
withName: KRONA_KTIMPORTTEXT {
publishDir = [
path: { "${params.outdir}/krona" },
mode: params.publish_dir_mode,
pattern: '*.{html}'
]
}
@ -307,7 +315,7 @@ process {
publishDir = [
path: { "${params.outdir}/centrifuge/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.txt'
pattern: '*.{txt,sam,gz}'
]
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
@ -343,6 +351,10 @@ process {
]
}
withName: KAIJU_KAIJU2KRONA {
ext.args = '-v -u'
}
withName: DIAMOND_BLASTX {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }

View file

@ -38,10 +38,18 @@ params {
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
malt_save_reads = true
kraken2_save_reads = true
centrifuge_save_reads = true
diamond_save_reads = true
}
process {
withName: MALT_RUN {
maxForks = 1
}
withName: MEGAN_RMA2INFO {
maxForks = 1
}
}

View file

@ -38,6 +38,7 @@ params {
run_centrifuge = true
run_diamond = true
run_motus = false
run_krona = true
}
process {

View file

@ -12,6 +12,8 @@
nf-core/taxprofiler can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files, long-read FASTQ files (e.g. Oxford Nanopore), or FASTA sequences (available for a subset of profilers).
> ⚠️ Input FASTQ files _must_ be gzipped, while FASTA files may optionally be uncompressed (although this is not recommended)
You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. Furthermother, nf-core/taxprofiler also requires a second comma-separated file of 3 columns with a header row as in the examples below.
This samplesheet is then specified on the command line as follows:
@ -219,6 +221,12 @@ Activating this functionality will concatenate the FASTQ files with the same sam
You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`.
##### Profiling
###### MALT
nf-core/taxprofiler uses MALT 0.4.1, which is a compatively old version. However it has been found that the most recent version of MALT (0.5.\*), at the time of writing, is broken. [The the LCA step appears not to be executed](http://megan.informatik.uni-tuebingen.de/t/lca-placement-failure-with-malt-v-0-5-2-and-0-5-3/1996/3), pushing all hits to the leaves of the taxonomy. However, if you need to use a more recent taxonomy map file with your databases, the output of `malt-build` from MALT 0.5.3 should be still be compatible with `malt-run` of 0.4.1.
### Updating the pipeline
When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

View file

@ -28,7 +28,7 @@
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"diamond/blastx": {
"git_sha": "42564565b934eeb2449e35ec97ed13ff2a67f1de"
"git_sha": "bd3bfe0817246082525ab93707976676b1fe208b"
},
"fastp": {
"git_sha": "d0a1cbb703a130c19f6796c3fce24fbe7dfce789"
@ -42,14 +42,23 @@
"kaiju/kaiju": {
"git_sha": "8856f127c58f6af479128be8b8df4d42e442ddbe"
},
"kaiju/kaiju2krona": {
"git_sha": "2f0b19240430de6807b1232e6d9d0e8084e8a28f"
},
"kaiju/kaiju2table": {
"git_sha": "538dbac98ba9c8f799536cd5a617195501439457"
},
"kraken2/kraken2": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
"git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
},
"krakentools/kreport2krona": {
"git_sha": "8b2a473f586bed003e72d2b183acc43fc0ddc422"
},
"krona/ktimporttext": {
"git_sha": "cdefbec66999c0b49d8bfeea9d6f9d19056635a2"
},
"malt/run": {
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
"git_sha": "be8d7b3293cac26cc63e4dbfb364deb8ed6ec7e5"
},
"megan/rma2info": {
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"

View file

@ -0,0 +1,40 @@
process KRONA_CLEANUP {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
input:
tuple val(meta), path(krona, stageAs: 'uncleaned.krona.txt')
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
# Copy the file to a new name
cp ${krona} ${prefix}.txt
# Remove ugly 'x__' prefixes for each of the taxonomic levels
LEVELS=(d k p c o f g s)
for L in "\${LEVELS[@]}"; do
sed -i "s/\${L}__//g" ${prefix}.txt
done
# Remove underscores that are standing in place of spaces
sed -i "s/_/ /g" ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -2,21 +2,26 @@ process DIAMOND_BLASTX {
tag "$meta.id"
label 'process_medium'
// Dimaond is limited to v2.0.9 because there is not a
// singularity version higher than this at the current time.
conda (params.enable_conda ? "bioconda::diamond=2.0.9" : null)
conda (params.enable_conda ? "bioconda::diamond=2.0.15" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/diamond:2.0.9--hdcc8f71_0' :
'quay.io/biocontainers/diamond:2.0.9--hdcc8f71_0' }"
'https://depot.galaxyproject.org/singularity/diamond:2.0.15--hb97b32f_0' :
'quay.io/biocontainers/diamond:2.0.15--hb97b32f_0' }"
input:
tuple val(meta), path(fasta)
path db
val outext
val out_ext
val blast_columns
output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
path "versions.yml" , emit: versions
tuple val(meta), path('*.blast'), optional: true, emit: blast
tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -24,7 +29,8 @@ process DIAMOND_BLASTX {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) {
def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
@ -32,6 +38,11 @@ process DIAMOND_BLASTX {
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
}
"""
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -41,9 +52,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\
--db \$DB \\
--query $fasta \\
--outfmt ${outfmt} \\
--outfmt ${outfmt} ${columns} \\
$args \\
--out ${prefix}.${outext}
--out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory
description: Directory containing the nucelotide blast database
pattern: "*"
- outext:
- out_ext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf"
output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing blastx hits
pattern: "*.{blastx.txt}"
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions:
type: file
description: File containing software versions

View file

@ -0,0 +1,39 @@
process KAIJU_KAIJU2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::kaiju=1.8.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/kaiju:1.8.2--h5b5514e_1':
'quay.io/biocontainers/kaiju:1.8.2--h5b5514e_1' }"
input:
tuple val(meta), path(tsv)
path(db)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
dbnodes=`find -L ${db} -name "*nodes.dmp"`
dbnames=`find -L ${db} -name "*names.dmp"`
kaiju2krona \\
$args \\
-t \$dbnodes \\
-n \$dbnames \\
-i ${tsv} \\
-o ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,44 @@
name: kaiju_kaiju2krona
description: Convert Kaiju's tab-separated output file into a tab-separated text file which can be imported into Krona.
keywords:
- taxonomy
- visualisation
- krona chart
- metagenomics
tools:
- "kaiju":
description: Fast and sensitive taxonomic classification for metagenomics
homepage: https://kaiju.binf.ku.dk/
documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md
tool_dev_url: https://github.com/bioinformatics-centre/kaiju
doi: "10.1038/ncomms11257"
licence: ["GNU GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tsv:
type: file
description: Kaiju tab-separated output file
pattern: "*.{tsv,txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Krona text-based input file converted from Kaiju report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 {
input:
tuple val(meta), path(reads)
path db
val save_output_fastqs
val save_reads_assignment
output:
tuple val(meta), path('*classified*') , emit: classified
tuple val(meta), path('*unclassified*'), emit: unclassified
tuple val(meta), path('*report.txt') , emit: txt
path "versions.yml" , emit: versions
tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq
tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq
tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment
tuple val(meta), path('*report.txt') , emit: report
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 {
def paired = meta.single_end ? "" : "--paired"
def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq"
def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq"
def classified_command = save_output_fastqs ? "--classified-out ${classified}" : ""
def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : ""
def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : ""
def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : ""
"""
kraken2 \\
--db $db \\
--threads $task.cpus \\
--unclassified-out $unclassified \\
--classified-out $classified \\
--report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\
$unclassified_command \\
$classified_command \\
$readclassification_command \\
$paired \\
$args \\
$reads
pigz -p $task.cpus *.fastq
$compress_reads_command
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -27,25 +27,40 @@ input:
- db:
type: directory
description: Kraken2 database
- save_output_fastqs:
type: boolean
description: |
If true, optional commands are added to save classified and unclassified reads
as fastq files
- save_reads_assignment:
type: boolean
description: |
If true, an optional command is added to save a file reporting the taxonomic
classification of each input read
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- classified:
- classified_reads_fastq:
type: file
description: |
Reads classified to belong to any of the taxa
Reads classified as belonging to any of the taxa
on the Kraken2 database.
pattern: "*{fastq.gz}"
- unclassified:
- unclassified_reads_fastq:
type: file
description: |
Reads not classified to belong to any of the taxa
Reads not classified to any of the taxa
on the Kraken2 database.
pattern: "*{fastq.gz}"
- txt:
- classified_reads_assignment:
type: file
description: |
Kraken2 output file indicating the taxonomic assignment of
each input read
- report:
type: file
description: |
Kraken2 report containing stats about classified

View file

@ -0,0 +1,36 @@
def VERSION = '1.2' // Version information not provided by tool on CLI
process KRAKENTOOLS_KREPORT2KRONA {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krakentools=1.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0':
'quay.io/biocontainers/krakentools:1.2--pyh5e36f6f_0' }"
input:
tuple val(meta), path(kreport)
output:
tuple val(meta), path("*.txt"), emit: txt
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
kreport2krona.py \\
-r ${kreport} \\
-o ${prefix}.txt \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
kreport2krona.py: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: krakentools_kreport2krona
description: Takes a Kraken report file and prints out a krona-compatible TEXT file
keywords:
- kraken
- krona
- metagenomics
- visualization
tools:
- krakentools:
description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs.
homepage: https://github.com/jenniferlu717/KrakenTools
licence: ["GPL v3"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- kreport:
type: file
description: Kraken report
pattern: "*.{txt,kreport}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- krona:
type: file
description: Krona text-based input file converted from Kraken report
pattern: "*.{txt,krona}"
authors:
- "@MillironX"

View file

@ -0,0 +1,34 @@
process KRONA_KTIMPORTTEXT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::krona=2.8.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/krona:2.8.1--pl5321hdfd78af_1':
'quay.io/biocontainers/krona:2.8.1--pl5321hdfd78af_1' }"
input:
tuple val(meta), path(report)
output:
tuple val(meta), path ('*.html'), emit: html
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
ktImportText \\
$args \\
-o ${prefix}.html \\
$report
cat <<-END_VERSIONS > versions.yml
"${task.process}":
krona: \$( echo \$(ktImportText 2>&1) | sed 's/^.*KronaTools //g; s/- ktImportText.*\$//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,47 @@
name: "krona_ktimporttext"
description: Creates a Krona chart from text files listing quantities and lineages.
keywords:
- plot
- taxonomy
- interactive
- html
- visualisation
- krona chart
- metagenomics
tools:
- krona:
description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files.
homepage: https://github.com/marbl/Krona/wiki/KronaTools
documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html
tool_dev_url: https://github.com/marbl/Krona
doi: 10.1186/1471-2105-12-385
licence: https://raw.githubusercontent.com/marbl/Krona/master/KronaTools/LICENSE.txt
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test']
- report:
type: file
description: "Tab-delimited text file. Each line should be a number followed by a list of wedges to contribute to (starting from the highest level). If no wedges are listed (and just a quantity is given), it will contribute to the top level. If the same lineage is listed more than once, the values will be added. Quantities can be omitted if -q is specified. Lines beginning with '#' will be ignored."
pattern: "*.{txt}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- html:
type: file
description: A html file containing an interactive krona plot.
pattern: "*.{html}"
authors:
- "@jianhong"

View file

@ -2,10 +2,10 @@ process MALT_RUN {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::malt=0.53" : null)
conda (params.enable_conda ? "bioconda::malt=0.41" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/malt:0.53--hdfd78af_0' :
'quay.io/biocontainers/malt:0.53--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/malt:0.41--1' :
'quay.io/biocontainers/malt:0.41--1' }"
input:
tuple val(meta), path(fastqs)
@ -33,7 +33,6 @@ process MALT_RUN {
"""
malt-run \\
-J-Xmx${avail_mem}g \\
-t $task.cpus \\
-v \\
-o . \\

View file

@ -102,16 +102,17 @@ params {
// MALT
run_malt = false
malt_mode = 'BlastN'
malt_generatemegansummary = false
malt_generate_megansummary = false
malt_save_reads = false
// kraken2
run_kraken2 = false
run_kraken2 = false
kraken2_save_reads = false
kraken2_save_readclassification = false
// centrifuge
run_centrifuge = false
centrifuge_save_unaligned = false
centrifuge_save_aligned = false
centrifuge_sam_format = false
centrifuge_save_reads = false
// metaphlan3
run_metaphlan3 = false
@ -122,10 +123,14 @@ params {
// diamond
run_diamond = false
diamond_output_format = 'txt'
diamond_output_format = 'tsv' // TSV is only format with taxonomic information apparently
diamond_save_reads = false // this will override default diamond output format so no taxonomic profile is generated!
// mOTUs
run_motus = false
// krona
run_krona = false
}
// Load base.config by default for all pipelines

View file

@ -260,6 +260,8 @@
"properties": {
"databases": {
"type": "string",
"mimetype": "text/csv",
"format": "file-path",
"default": "None"
},
"shortread_qc_excludeunmerged": {
@ -278,15 +280,6 @@
"run_centrifuge": {
"type": "boolean"
},
"centrifuge_save_unaligned": {
"type": "boolean"
},
"centrifuge_save_aligned": {
"type": "boolean"
},
"centrifuge_sam_format": {
"type": "boolean"
},
"run_metaphlan3": {
"type": "boolean",
"description": "Enable MetaPhlAn for taxonomic profiling"
@ -386,7 +379,7 @@
"run_kaiju": {
"type": "boolean"
},
"malt_generatemegansummary": {
"malt_generate_megansummary": {
"type": "boolean"
},
"kaiju_taxon_name": {
@ -430,6 +423,24 @@
},
"run_motus": {
"type": "boolean"
},
"malt_save_reads": {
"type": "boolean"
},
"kraken2_save_reads": {
"type": "boolean"
},
"kraken2_save_readclassification": {
"type": "boolean"
},
"centrifuge_save_reads": {
"type": "boolean"
},
"diamond_save_reads": {
"type": "boolean"
},
"run_krona": {
"type": "boolean"
}
}
}

View file

@ -48,7 +48,7 @@ workflow LONGREAD_PREPROCESSING {
}
FASTQC_PROCESSED ( ch_processed_reads.dump(tag: "filtlong") )
FASTQC_PROCESSED ( ch_processed_reads )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit:

View file

@ -21,7 +21,8 @@ workflow PROFILING {
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_raw_profiles = Channel.empty()
ch_raw_classifications = Channel.empty()
ch_raw_profiles = Channel.empty()
/*
COMBINE READS WITH POSSIBLE DATABASES
@ -62,14 +63,29 @@ workflow PROFILING {
// MALT: We groupTuple to have all samples in one channel for MALT as database
// loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.map {
it ->
def temp_meta = [ id: it[2]['db_name']] + it[2]
def db = it[3]
[ temp_meta, it[1], db ]
meta, reads, db_meta, db ->
// Reset entire input meta for MALT to just database name,
// as we don't run run on a per-sample basis due to huge datbaases
// so all samples are in one run and so sample-specific metadata
// unnecessary. Set as database name to prevent `null` job ID and prefix.
def temp_meta = [ id: meta['db_name'] ]
// Extend database parameters to specify whether to save alignments or not
def new_db_meta = db_meta.clone()
def sam_format = params.malt_save_reads ? ' --alignments ./ -za false' : ""
new_db_meta['db_params'] = db_meta['db_params'] + sam_format
// Combine reduced sample metadata with updated database parameters metadata,
// make sure id is db_name for publishing purposes.
def new_meta = temp_meta + new_db_meta
new_meta['id'] = new_meta['db_name']
[ new_meta, reads, db ]
}
.groupTuple(by: [0,2])
.multiMap {
@ -93,10 +109,11 @@ workflow PROFILING {
[ meta_new, rma ]
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generate_megansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first(), MEGAN_RMA2INFO.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( ch_maltrun_for_megan )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
}
@ -109,10 +126,11 @@ workflow PROFILING {
db: it[3]
}
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db, params.kraken2_save_reads, params.kraken2_save_readclassification )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.report.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KRAKEN2_KRAKEN2.out.classified_reads_assignment )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.report )
}
@ -129,10 +147,11 @@ workflow PROFILING {
db: it[3]
}
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads )
CENTRIFUGE_KREPORT (CENTRIFUGE_CENTRIFUGE.out.results, ch_input_for_centrifuge.db)
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_KREPORT.out.kreport )
}
@ -168,6 +187,7 @@ workflow PROFILING {
KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_name)
ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
}
@ -181,9 +201,13 @@ workflow PROFILING {
db: it[3]
}
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, params.diamond_output_format )
// diamond only accepts single output file specification, therefore
// this will replace output file!
ch_diamond_reads_format = params.diamond_save_reads ? 'sam' : params.diamond_output_format
DIAMOND_BLASTX ( ch_input_for_diamond.reads, ch_input_for_diamond.db, ch_diamond_reads_format , [] )
ch_versions = ch_versions.mix( DIAMOND_BLASTX.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.output )
ch_raw_profiles = ch_raw_profiles.mix( DIAMOND_BLASTX.out.tsv )
}
@ -207,7 +231,8 @@ workflow PROFILING {
}
emit:
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
classifications = ch_raw_classifications
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}

View file

@ -0,0 +1,84 @@
//
// Create Krona visualizations
//
include { KAIJU_KAIJU2KRONA } from '../../modules/nf-core/modules/kaiju/kaiju2krona/main'
include { KRAKENTOOLS_KREPORT2KRONA } from '../../modules/nf-core/modules/krakentools/kreport2krona/main'
include { KRONA_CLEANUP } from '../../modules/local/krona_cleanup'
include { KRONA_KTIMPORTTEXT } from '../../modules/nf-core/modules/krona/ktimporttext/main'
workflow VISUALIZATION_KRONA {
take:
classifications
profiles
databases
main:
ch_krona_text = Channel.empty()
ch_krona_html = Channel.empty()
ch_versions = Channel.empty()
/*
Split profile results based on tool they come from
*/
ch_input_profiles = profiles
.branch {
centrifuge: it[0]['tool'] == 'centrifuge'
kraken2: it[0]['tool'] == 'kraken2'
unknown: true
}
ch_input_classifications = classifications
.branch {
kaiju: it[0]['tool'] == 'kaiju'
unknown: true
}
/*
Convert Kraken2 formatted reports into Krona text files
*/
ch_kraken_reports = ch_input_profiles.kraken2
.mix( ch_input_profiles.centrifuge )
KRAKENTOOLS_KREPORT2KRONA ( ch_kraken_reports )
ch_krona_text = ch_krona_text.mix( KRAKENTOOLS_KREPORT2KRONA.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_KREPORT2KRONA.out.versions.first() )
/*
Combine Kaiju profiles with their databases
*/
ch_input_for_kaiju2krona = ch_input_classifications.kaiju
.map{ [it[0]['db_name'], it[0], it[1]] }
.combine( databases.map{ [it[0]['db_name'], it[1]] }, by: 0 )
.multiMap{
it ->
profiles: [it[1], it[2]]
db: it[3]
}
/*
Convert Kaiju formatted reports into Krona text files
*/
KAIJU_KAIJU2KRONA( ch_input_for_kaiju2krona.profiles, ch_input_for_kaiju2krona.db )
ch_krona_text = ch_krona_text.mix( KAIJU_KAIJU2KRONA.out.txt )
ch_versions = ch_versions.mix( KAIJU_KAIJU2KRONA.out.versions.first() )
/*
Remove taxonomy level annotations from the Krona text files
*/
KRONA_CLEANUP( ch_krona_text )
ch_cleaned_krona_text = KRONA_CLEANUP.out.txt
ch_versions = ch_versions.mix( KRONA_CLEANUP.out.versions.first() )
/*
Convert Krona text files into html Krona visualizations
*/
ch_krona_text_for_import = ch_cleaned_krona_text
.map{[[id: it[0]['db_name']], it[1]]}
.groupTuple()
KRONA_KTIMPORTTEXT( ch_krona_text_for_import )
ch_krona_html = ch_krona_html.mix( KRONA_KTIMPORTTEXT.out.html )
ch_versions = ch_versions.mix( KRONA_KTIMPORTTEXT.out.versions.first() )
emit:
html = ch_krona_html
versions = ch_versions
}

View file

@ -33,6 +33,8 @@ if (params.hostremoval_reference ) { ch_reference = file(params.hostre
if (params.shortread_hostremoval_index ) { ch_shortread_reference_index = file(params.shortread_hostremoval_index ) } else { ch_shortread_reference_index = [] }
if (params.longread_hostremoval_index ) { ch_longread_reference_index = file(params.longread_hostremoval_index ) } else { ch_longread_reference_index = [] }
if (params.diamond_save_reads ) log.warn "[nf-core/taxprofiler] DIAMOND only allows output of a single format. As --diamond_save_reads supplied, only aligned reads in SAM format will be produced, no taxonomic profiles will be available."
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
@ -60,6 +62,7 @@ include { SHORTREAD_HOSTREMOVAL } from '../subworkflows/local/shortread_
include { LONGREAD_HOSTREMOVAL } from '../subworkflows/local/longread_hostremoval'
include { SHORTREAD_COMPLEXITYFILTERING } from '../subworkflows/local/shortread_complexityfiltering'
include { PROFILING } from '../subworkflows/local/profiling'
include { VISUALIZATION_KRONA } from '../subworkflows/local/visualization_krona'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -207,6 +210,14 @@ workflow TAXPROFILER {
PROFILING ( ch_reads_runmerged, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( PROFILING.out.versions )
/*
SUBWORKFLOW: VISUALIZATION_KRONA
*/
if ( params.run_krona ) {
VISUALIZATION_KRONA ( PROFILING.out.classifications, PROFILING.out.profiles, DB_CHECK.out.dbs )
ch_versions = ch_versions.mix( VISUALIZATION_KRONA.out.versions )
}
/*
MODULE: MultiQC
*/