From cc73cdd51d26330d831ec306f8a5da081fb8665a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sat, 16 Apr 2022 07:42:30 +0200 Subject: [PATCH] Add generation of taxon-table like output for MALT --- CITATIONS.md | 10 ++++- conf/modules.config | 15 ++++++- nextflow.config | 1 + nextflow_schema.json | 7 +++- subworkflows/local/profiling.nf | 32 +++++++++++---- .../local/shortread_postprocessing.nf | 39 ------------------- 6 files changed, 52 insertions(+), 52 deletions(-) delete mode 100644 subworkflows/local/shortread_postprocessing.nf diff --git a/CITATIONS.md b/CITATIONS.md index e7bcf47..02621d9 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,9 +40,17 @@ > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. +- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957) + + > Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957. + - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) - > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. + > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088 + +- [Centrifuge](https://doi.org/10.1101/gr.210641.116) + + > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116. ## Software packaging/containerisation tools diff --git a/conf/modules.config b/conf/modules.config index ccd1748..23455a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -191,11 +191,22 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + // one run with multiple samples, so fix ID to just db name to ensure clean log name + ext.prefix = { "${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.{log}' + pattern: '*.{rma6,log,sam}' + ] + } + + withName: MEGAN_RMA2INFO { + ext.args = "-c2c Taxonomy" + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/malt/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.{txt.gz,megan}' ] } diff --git a/nextflow.config b/nextflow.config index bf3ca92..a618f66 100644 --- a/nextflow.config +++ b/nextflow.config @@ -88,6 +88,7 @@ params { // MALT run_malt = false malt_mode = 'BlastN' + malt_generatemegansummary = false // kraken2 run_kraken2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index cf0edab..2cbfb0e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -364,11 +364,14 @@ }, "shortread_hostremoval_reference": { "type": "string", - "default": null + "default": "None" }, "shortread_hostremoval_index": { "type": "string", - "default": null + "default": "None" + }, + "malt_generatemegansummary": { + "type": "boolean" } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 23b96e7..d7532b7 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -3,6 +3,7 @@ // include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main' +include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' @@ -95,33 +96,48 @@ workflow PROFILING { if ( params.run_malt ) { MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( MALT_RUN.out.rma6 ) + + ch_maltrun_for_megan = MALT_RUN.out.rma6 + .transpose() + .map{ + meta, rma -> + // re-extract meta from file names, use filename without rma to + // ensure we keep paired-end information in downstream filenames + // when no pair-merging + def meta_new = meta.clone() + meta_new['db_name'] = meta.id + meta_new['id'] = rma.name - ( '.' + rma.extension ) + [ meta_new, rma ] + } + + MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) } if ( params.run_kraken2 ) { KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) } if ( params.run_centrifuge ) { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report ) } if ( params.run_metaphlan3 ) { METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) - ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) + ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) } emit: - profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/subworkflows/local/shortread_postprocessing.nf b/subworkflows/local/shortread_postprocessing.nf deleted file mode 100644 index 7fb0d70..0000000 --- a/subworkflows/local/shortread_postprocessing.nf +++ /dev/null @@ -1,39 +0,0 @@ -// -// Perform read trimming and merging -// - - -include { SHORTREAD_FASTP } from './shortread_fastp' -include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' -include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' - -workflow SHORTREAD_POSTPROCESSING { - take: - input // [ [ meta ], [ taxon_table/file ] ] - - main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - if ( params.shortread_clipmerge_tool == "fastp" ) { - ch_processed_reads = SHORTREAD_FASTP ( reads ).reads - ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) - } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { - ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads - ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) - } else { - ch_processed_reads = reads - } - - FASTQC_PROCESSED ( ch_processed_reads ) - ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) - - emit: - output = output // channel: [ val(meta), taxon_table ] - versions = ch_versions // channel: [ versions.yml ] - mqc = ch_multiqc_files -} -