1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-10 22:53:08 +00:00

Add generation of taxon-table like output for MALT

This commit is contained in:
James Fellows Yates 2022-04-16 07:42:30 +02:00
parent 6fecb3eeb7
commit cc73cdd51d
6 changed files with 52 additions and 52 deletions

View file

@ -40,9 +40,17 @@
> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)
> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)
> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)
> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
## Software packaging/containerisation tools ## Software packaging/containerisation tools

View file

@ -191,11 +191,22 @@ process {
withName: MALT_RUN { withName: MALT_RUN {
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } // one run with multiple samples, so fix ID to just db name to ensure clean log name
ext.prefix = { "${meta.db_name}" }
publishDir = [ publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" }, path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
pattern: '*.{log}' pattern: '*.{rma6,log,sam}'
]
}
withName: MEGAN_RMA2INFO {
ext.args = "-c2c Taxonomy"
ext.prefix = { "${meta.id}" }
publishDir = [
path: { "${params.outdir}/malt/${meta.db_name}" },
mode: params.publish_dir_mode,
pattern: '*.{txt.gz,megan}'
] ]
} }

View file

@ -88,6 +88,7 @@ params {
// MALT // MALT
run_malt = false run_malt = false
malt_mode = 'BlastN' malt_mode = 'BlastN'
malt_generatemegansummary = false
// kraken2 // kraken2
run_kraken2 = false run_kraken2 = false

View file

@ -364,11 +364,14 @@
}, },
"shortread_hostremoval_reference": { "shortread_hostremoval_reference": {
"type": "string", "type": "string",
"default": null "default": "None"
}, },
"shortread_hostremoval_index": { "shortread_hostremoval_index": {
"type": "string", "type": "string",
"default": null "default": "None"
},
"malt_generatemegansummary": {
"type": "boolean"
} }
} }
} }

View file

@ -3,6 +3,7 @@
// //
include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main' include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main'
include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main'
include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main'
include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main'
include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main'
@ -95,33 +96,48 @@ workflow PROFILING {
if ( params.run_malt ) { if ( params.run_malt ) {
MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) ch_maltrun_for_megan = MALT_RUN.out.rma6
ch_raw_profiles = ch_raw_profiles.mix( MALT_RUN.out.rma6 ) .transpose()
.map{
meta, rma ->
// re-extract meta from file names, use filename without rma to
// ensure we keep paired-end information in downstream filenames
// when no pair-merging
def meta_new = meta.clone()
meta_new['db_name'] = meta.id
meta_new['id'] = rma.name - ( '.' + rma.extension )
[ meta_new, rma ]
}
MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary )
ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt )
} }
if ( params.run_kraken2 ) { if ( params.run_kraken2 ) {
KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db )
ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt )
} }
if ( params.run_centrifuge ) { if ( params.run_centrifuge ) {
CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format )
ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report )
} }
if ( params.run_metaphlan3 ) { if ( params.run_metaphlan3 ) {
METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db )
ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() )
ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom )
} }
emit: emit:
profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom
versions = ch_versions // channel: [ versions.yml ] versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files mqc = ch_multiqc_files
} }

View file

@ -1,39 +0,0 @@
//
// Perform read trimming and merging
//
include { SHORTREAD_FASTP } from './shortread_fastp'
include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval'
include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main'
workflow SHORTREAD_POSTPROCESSING {
take:
input // [ [ meta ], [ taxon_table/file ] ]
main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
if ( params.shortread_clipmerge_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc )
} else if ( params.shortread_clipmerge_tool == "adapterremoval" ) {
ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc )
} else {
ch_processed_reads = reads
}
FASTQC_PROCESSED ( ch_processed_reads )
ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions )
ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip )
emit:
output = output // channel: [ val(meta), taxon_table ]
versions = ch_versions // channel: [ versions.yml ]
mqc = ch_multiqc_files
}