mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 14:29:55 +00:00
Merge pull request #55 from nf-core/megan-postprocessing
Add MEGAN for MALT postprocessing
This commit is contained in:
commit
53d76559af
8 changed files with 150 additions and 15 deletions
10
CITATIONS.md
10
CITATIONS.md
|
@ -40,9 +40,17 @@
|
|||
|
||||
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
|
||||
|
||||
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
|
||||
|
||||
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
|
||||
|
||||
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
|
||||
|
||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
|
||||
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
|
||||
|
||||
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
|
||||
|
||||
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
|
||||
|
||||
## Software packaging/containerisation tools
|
||||
|
||||
|
|
|
@ -191,11 +191,22 @@ process {
|
|||
|
||||
withName: MALT_RUN {
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
// one run with multiple samples, so fix ID to just db name to ensure clean log name
|
||||
ext.prefix = { "${meta.db_name}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{log}'
|
||||
pattern: '*.{rma6,log,sam}'
|
||||
]
|
||||
}
|
||||
|
||||
withName: MEGAN_RMA2INFO {
|
||||
ext.args = "-c2c Taxonomy"
|
||||
ext.prefix = { "${meta.id}" }
|
||||
publishDir = [
|
||||
path: { "${params.outdir}/malt/${meta.db_name}" },
|
||||
mode: params.publish_dir_mode,
|
||||
pattern: '*.{txt.gz,megan}'
|
||||
]
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,9 @@
|
|||
"malt/run": {
|
||||
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
|
||||
},
|
||||
"megan/rma2info": {
|
||||
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
|
||||
},
|
||||
"metaphlan3": {
|
||||
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
|
||||
},
|
||||
|
|
38
modules/nf-core/modules/megan/rma2info/main.nf
generated
Normal file
38
modules/nf-core/modules/megan/rma2info/main.nf
generated
Normal file
|
@ -0,0 +1,38 @@
|
|||
process MEGAN_RMA2INFO {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
|
||||
'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(rma6)
|
||||
val(megan_summary)
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.txt.gz") , emit: txt
|
||||
tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def summary = megan_summary ? "-es ${prefix}.megan" : ""
|
||||
"""
|
||||
rma2info \\
|
||||
-i ${rma6} \\
|
||||
-o ${prefix}.txt.gz \\
|
||||
${summary} \\
|
||||
$args
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
51
modules/nf-core/modules/megan/rma2info/meta.yml
generated
Normal file
51
modules/nf-core/modules/megan/rma2info/meta.yml
generated
Normal file
|
@ -0,0 +1,51 @@
|
|||
name: "megan_rma2info"
|
||||
description: Analyses an RMA file and exports information in text format
|
||||
keywords:
|
||||
- megan
|
||||
- rma6
|
||||
- classification
|
||||
- conversion
|
||||
tools:
|
||||
- "megan":
|
||||
description: "A tool for studying the taxonomic content of a set of DNA reads"
|
||||
homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
|
||||
documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
|
||||
tool_dev_url: "https://github.com/husonlab/megan-ce"
|
||||
doi: "10.1371/journal.pcbi.1004957"
|
||||
licence: "['GPL >=3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- rma6:
|
||||
type: file
|
||||
description: RMA6 file from MEGAN or MALT
|
||||
pattern: "*.rma6"
|
||||
- megan_summary:
|
||||
type: boolean
|
||||
description: Specify whether to generate an MEGAN summary file
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- txt:
|
||||
type: file
|
||||
description: Compressed text file
|
||||
pattern: "*.txt.gz"
|
||||
- megan_summary:
|
||||
type: file
|
||||
description: Optionally generated MEGAN summary file
|
||||
pattern: "*.megan"
|
||||
|
||||
authors:
|
||||
- "@jfy133"
|
|
@ -88,6 +88,7 @@ params {
|
|||
// MALT
|
||||
run_malt = false
|
||||
malt_mode = 'BlastN'
|
||||
malt_generatemegansummary = false
|
||||
|
||||
// kraken2
|
||||
run_kraken2 = false
|
||||
|
|
|
@ -364,11 +364,14 @@
|
|||
},
|
||||
"shortread_hostremoval_reference": {
|
||||
"type": "string",
|
||||
"default": null
|
||||
"default": "None"
|
||||
},
|
||||
"shortread_hostremoval_index": {
|
||||
"type": "string",
|
||||
"default": null
|
||||
"default": "None"
|
||||
},
|
||||
"malt_generatemegansummary": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
//
|
||||
|
||||
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
|
||||
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
|
||||
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
|
||||
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
|
||||
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
|
||||
|
@ -15,6 +16,7 @@ workflow PROFILING {
|
|||
main:
|
||||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
ch_raw_profiles = Channel.empty()
|
||||
|
||||
/*
|
||||
COMBINE READS WITH POSSIBLE DATABASES
|
||||
|
@ -94,30 +96,48 @@ workflow PROFILING {
|
|||
|
||||
if ( params.run_malt ) {
|
||||
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
|
||||
|
||||
ch_maltrun_for_megan = MALT_RUN.out.rma6
|
||||
.transpose()
|
||||
.map{
|
||||
meta, rma ->
|
||||
// re-extract meta from file names, use filename without rma to
|
||||
// ensure we keep paired-end information in downstream filenames
|
||||
// when no pair-merging
|
||||
def meta_new = meta.clone()
|
||||
meta_new['db_name'] = meta.id
|
||||
meta_new['id'] = rma.baseName
|
||||
[ meta_new, rma ]
|
||||
}
|
||||
|
||||
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
|
||||
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
|
||||
}
|
||||
|
||||
if ( params.run_kraken2 ) {
|
||||
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
|
||||
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
|
||||
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
|
||||
}
|
||||
|
||||
if ( params.run_centrifuge ) {
|
||||
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
|
||||
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report )
|
||||
}
|
||||
|
||||
if ( params.run_metaphlan3 ) {
|
||||
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
|
||||
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
|
||||
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
|
||||
}
|
||||
|
||||
|
||||
emit:
|
||||
// TODO work out if there is enough standardisation of output to export as one?
|
||||
//output = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
|
||||
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
|
||||
versions = ch_versions // channel: [ versions.yml ]
|
||||
mqc = ch_multiqc_files
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue