mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-26 06:09:56 +00:00
Merge pull request #55 from nf-core/megan-postprocessing
Add MEGAN for MALT postprocessing
This commit is contained in:
commit
53d76559af
8 changed files with 150 additions and 15 deletions
10
CITATIONS.md
10
CITATIONS.md
|
@ -40,9 +40,17 @@
|
||||||
|
|
||||||
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||||
|
|
||||||
|
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
|
||||||
|
|
||||||
|
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
|
||||||
|
|
||||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||||
|
|
||||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
|
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
|
||||||
|
|
||||||
|
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
|
||||||
|
|
||||||
|
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
|
||||||
|
|
||||||
## Software packaging/containerisation tools
|
## Software packaging/containerisation tools
|
||||||
|
|
||||||
|
|
|
@ -191,11 +191,22 @@ process {
|
||||||
|
|
||||||
withName: MALT_RUN {
|
withName: MALT_RUN {
|
||||||
ext.args = { "${meta.db_params}" }
|
ext.args = { "${meta.db_params}" }
|
||||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
// one run with multiple samples, so fix ID to just db name to ensure clean log name
|
||||||
|
ext.prefix = { "${meta.db_name}" }
|
||||||
publishDir = [
|
publishDir = [
|
||||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||||
mode: params.publish_dir_mode,
|
mode: params.publish_dir_mode,
|
||||||
pattern: '*.{log}'
|
pattern: '*.{rma6,log,sam}'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
withName: MEGAN_RMA2INFO {
|
||||||
|
ext.args = "-c2c Taxonomy"
|
||||||
|
ext.prefix = { "${meta.id}" }
|
||||||
|
publishDir = [
|
||||||
|
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||||
|
mode: params.publish_dir_mode,
|
||||||
|
pattern: '*.{txt.gz,megan}'
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,9 @@
|
||||||
"malt/run": {
|
"malt/run": {
|
||||||
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
||||||
},
|
},
|
||||||
|
"megan/rma2info": {
|
||||||
|
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
|
||||||
|
},
|
||||||
"metaphlan3": {
|
"metaphlan3": {
|
||||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||||
},
|
},
|
||||||
|
|
38
modules/nf-core/modules/megan/rma2info/main.nf
generated
Normal file
38
modules/nf-core/modules/megan/rma2info/main.nf
generated
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
process MEGAN_RMA2INFO {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
|
||||||
|
'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(rma6)
|
||||||
|
val(megan_summary)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.txt.gz") , emit: txt
|
||||||
|
tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def summary = megan_summary ? "-es ${prefix}.megan" : ""
|
||||||
|
"""
|
||||||
|
rma2info \\
|
||||||
|
-i ${rma6} \\
|
||||||
|
-o ${prefix}.txt.gz \\
|
||||||
|
${summary} \\
|
||||||
|
$args
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
51
modules/nf-core/modules/megan/rma2info/meta.yml
generated
Normal file
51
modules/nf-core/modules/megan/rma2info/meta.yml
generated
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
name: "megan_rma2info"
|
||||||
|
description: Analyses an RMA file and exports information in text format
|
||||||
|
keywords:
|
||||||
|
- megan
|
||||||
|
- rma6
|
||||||
|
- classification
|
||||||
|
- conversion
|
||||||
|
tools:
|
||||||
|
- "megan":
|
||||||
|
description: "A tool for studying the taxonomic content of a set of DNA reads"
|
||||||
|
homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
|
||||||
|
documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
|
||||||
|
tool_dev_url: "https://github.com/husonlab/megan-ce"
|
||||||
|
doi: "10.1371/journal.pcbi.1004957"
|
||||||
|
licence: "['GPL >=3']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- rma6:
|
||||||
|
type: file
|
||||||
|
description: RMA6 file from MEGAN or MALT
|
||||||
|
pattern: "*.rma6"
|
||||||
|
- megan_summary:
|
||||||
|
type: boolean
|
||||||
|
description: Specify whether to generate an MEGAN summary file
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- txt:
|
||||||
|
type: file
|
||||||
|
description: Compressed text file
|
||||||
|
pattern: "*.txt.gz"
|
||||||
|
- megan_summary:
|
||||||
|
type: file
|
||||||
|
description: Optionally generated MEGAN summary file
|
||||||
|
pattern: "*.megan"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@jfy133"
|
|
@ -88,6 +88,7 @@ params {
|
||||||
// MALT
|
// MALT
|
||||||
run_malt = false
|
run_malt = false
|
||||||
malt_mode = 'BlastN'
|
malt_mode = 'BlastN'
|
||||||
|
malt_generatemegansummary = false
|
||||||
|
|
||||||
// kraken2
|
// kraken2
|
||||||
run_kraken2 = false
|
run_kraken2 = false
|
||||||
|
|
|
@ -364,11 +364,14 @@
|
||||||
},
|
},
|
||||||
"shortread_hostremoval_reference": {
|
"shortread_hostremoval_reference": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": null
|
"default": "None"
|
||||||
},
|
},
|
||||||
"shortread_hostremoval_index": {
|
"shortread_hostremoval_index": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": null
|
"default": "None"
|
||||||
|
},
|
||||||
|
"malt_generatemegansummary": {
|
||||||
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
|
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
|
||||||
|
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
|
||||||
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
|
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
|
||||||
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
|
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||||
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
|
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
|
||||||
|
@ -13,8 +14,9 @@ workflow PROFILING {
|
||||||
databases // [ [ meta ], path ]
|
databases // [ [ meta ], path ]
|
||||||
|
|
||||||
main:
|
main:
|
||||||
ch_versions = Channel.empty()
|
ch_versions = Channel.empty()
|
||||||
ch_multiqc_files = Channel.empty()
|
ch_multiqc_files = Channel.empty()
|
||||||
|
ch_raw_profiles = Channel.empty()
|
||||||
|
|
||||||
/*
|
/*
|
||||||
COMBINE READS WITH POSSIBLE DATABASES
|
COMBINE READS WITH POSSIBLE DATABASES
|
||||||
|
@ -94,30 +96,48 @@ workflow PROFILING {
|
||||||
|
|
||||||
if ( params.run_malt ) {
|
if ( params.run_malt ) {
|
||||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
|
|
||||||
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
|
ch_maltrun_for_megan = MALT_RUN.out.rma6
|
||||||
|
.transpose()
|
||||||
|
.map{
|
||||||
|
meta, rma ->
|
||||||
|
// re-extract meta from file names, use filename without rma to
|
||||||
|
// ensure we keep paired-end information in downstream filenames
|
||||||
|
// when no pair-merging
|
||||||
|
def meta_new = meta.clone()
|
||||||
|
meta_new['db_name'] = meta.id
|
||||||
|
meta_new['id'] = rma.baseName
|
||||||
|
[ meta_new, rma ]
|
||||||
|
}
|
||||||
|
|
||||||
|
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
|
||||||
|
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
|
||||||
|
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
|
||||||
|
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_kraken2 ) {
|
if ( params.run_kraken2 ) {
|
||||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
|
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
|
||||||
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
|
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
|
||||||
|
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_centrifuge ) {
|
if ( params.run_centrifuge ) {
|
||||||
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||||
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
|
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
|
||||||
|
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report )
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( params.run_metaphlan3 ) {
|
if ( params.run_metaphlan3 ) {
|
||||||
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||||
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
|
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
|
||||||
|
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
// TODO work out if there is enough standardisation of output to export as one?
|
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
||||||
//output = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
|
|
||||||
versions = ch_versions // channel: [ versions.yml ]
|
versions = ch_versions // channel: [ versions.yml ]
|
||||||
mqc = ch_multiqc_files
|
mqc = ch_multiqc_files
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue