1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-09-21 08:12:03 +00:00

Merge pull request #55 from nf-core/megan-postprocessing

Add MEGAN for MALT postprocessing
This commit is contained in:
James A. Fellows Yates 2022-04-20 09:52:55 +02:00 committed by GitHub
commit 53d76559af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 150 additions and 15 deletions

View file

@ -40,9 +40,17 @@
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088.
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
## Software packaging/containerisation tools

View file

@ -191,11 +191,22 @@ process {
withName: MALT_RUN {
ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
// one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{log}'
pattern: '*.{rma6,log,sam}'
]
}
withName: MEGAN_RMA2INFO {
ext.args = "-c2c Taxonomy"
ext.prefix = { "${meta.id}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt.gz,megan}'
]
}

View file

@ -36,6 +36,9 @@
"malt/run": {
"git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b"
},
"megan/rma2info": {
"git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece"
},
"metaphlan3": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},

View file

@ -0,0 +1,38 @@
process MEGAN_RMA2INFO {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::megan=6.21.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0':
'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }"
input:
tuple val(meta), path(rma6)
val(megan_summary)
output:
tuple val(meta), path("*.txt.gz") , emit: txt
tuple val(meta), path("*.megan"), optional: true, emit: megan_summary
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def summary = megan_summary ? "-es ${prefix}.megan" : ""
"""
rma2info \\
-i ${rma6} \\
-o ${prefix}.txt.gz \\
${summary} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g')
END_VERSIONS
"""
}

View file

@ -0,0 +1,51 @@
name: "megan_rma2info"
description: Analyses an RMA file and exports information in text format
keywords:
- megan
- rma6
- classification
- conversion
tools:
- "megan":
description: "A tool for studying the taxonomic content of a set of DNA reads"
homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/"
documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html"
tool_dev_url: "https://github.com/husonlab/megan-ce"
doi: "10.1371/journal.pcbi.1004957"
licence: "['GPL >=3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- rma6:
type: file
description: RMA6 file from MEGAN or MALT
pattern: "*.rma6"
- megan_summary:
type: boolean
description: Specify whether to generate an MEGAN summary file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: Compressed text file
pattern: "*.txt.gz"
- megan_summary:
type: file
description: Optionally generated MEGAN summary file
pattern: "*.megan"
authors:
- "@jfy133"

View file

@ -88,6 +88,7 @@ params {
// MALT
run_malt = false
malt_mode = 'BlastN'
malt_generatemegansummary = false
// kraken2
run_kraken2 = false

View file

@ -364,11 +364,14 @@
},
"shortread_hostremoval_reference": {
"type": "string",
"default": null
"default": "None"
},
"shortread_hostremoval_index": {
"type": "string",
"default": null
"default": "None"
},
"malt_generatemegansummary": {
"type": "boolean"
}
}
}

View file

@ -3,6 +3,7 @@
//
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
@ -13,8 +14,9 @@ workflow PROFILING {
databases // [ [ meta ], path ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
ch_raw_profiles = Channel.empty()
/*
COMBINE READS WITH POSSIBLE DATABASES
@ -94,30 +96,48 @@ workflow PROFILING {
if ( params.run_malt ) {
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
ch_maltrun_for_megan = MALT_RUN.out.rma6
.transpose()
.map{
meta, rma ->
// re-extract meta from file names, use filename without rma to
// ensure we keep paired-end information in downstream filenames
// when no pair-merging
def meta_new = meta.clone()
meta_new['db_name'] = meta.id
meta_new['id'] = rma.baseName
[ meta_new, rma ]
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
}
if ( params.run_kraken2 ) {
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
}
if ( params.run_centrifuge ) {
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report )
}
if ( params.run_metaphlan3 ) {
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
}
emit:
// TODO work out if there is enough standardisation of output to export as one?
//output = ch_filtered_reads // channel: [ val(meta), [ reads ] ]
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}