From de5734052601ce6bedd7be2389fef4bacd0affb1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 11:49:35 +0200 Subject: [PATCH 1/4] Start work --- modules.json | 5 +- .../nf-core/modules/megan/rma2info/main.nf | 38 ++++++++++++++ .../nf-core/modules/megan/rma2info/meta.yml | 51 +++++++++++++++++++ nextflow.config | 1 + subworkflows/local/profiling.nf | 22 ++++---- .../local/shortread_postprocessing.nf | 39 ++++++++++++++ 6 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 modules/nf-core/modules/megan/rma2info/main.nf create mode 100644 modules/nf-core/modules/megan/rma2info/meta.yml create mode 100644 subworkflows/local/shortread_postprocessing.nf diff --git a/modules.json b/modules.json index 7fbc65c..e921454 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,9 @@ "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" }, + "megan/rma2info": { + "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" + }, "metaphlan3": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -47,4 +50,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/megan/rma2info/main.nf b/modules/nf-core/modules/megan/rma2info/main.nf new file mode 100644 index 0000000..80d1975 --- /dev/null +++ b/modules/nf-core/modules/megan/rma2info/main.nf @@ -0,0 +1,38 @@ +process MEGAN_RMA2INFO { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::megan=6.21.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0': + 'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }" + + input: + tuple val(meta), path(rma6) + val(megan_summary) + + output: + tuple val(meta), path("*.txt.gz") , emit: txt + tuple val(meta), path("*.megan"), optional: true, emit: megan_summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def summary = megan_summary ? "-es ${prefix}.megan" : "" + """ + rma2info \\ + -i ${rma6} \\ + -o ${prefix}.txt.gz \\ + ${summary} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/megan/rma2info/meta.yml b/modules/nf-core/modules/megan/rma2info/meta.yml new file mode 100644 index 0000000..0f2d5a9 --- /dev/null +++ b/modules/nf-core/modules/megan/rma2info/meta.yml @@ -0,0 +1,51 @@ +name: "megan_rma2info" +description: Analyses an RMA file and exports information in text format +keywords: + - megan + - rma6 + - classification + - conversion +tools: + - "megan": + description: "A tool for studying the taxonomic content of a set of DNA reads" + homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/" + documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html" + tool_dev_url: "https://github.com/husonlab/megan-ce" + doi: "10.1371/journal.pcbi.1004957" + licence: "['GPL >=3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - rma6: + type: file + description: RMA6 file from MEGAN or MALT + pattern: "*.rma6" + - megan_summary: + type: boolean + description: Specify whether to generate an MEGAN summary file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Compressed text file + pattern: "*.txt.gz" + - megan_summary: + type: file + description: Optionally generated MEGAN summary file + pattern: "*.megan" + +authors: + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index b4a8d91..8ddf365 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,6 +89,7 @@ params { centrifuge_save_unaligned = false centrifuge_save_aligned = false centrifuge_sam_format = false + // metaphlan3 run_metaphlan3 = false } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c74c583..59b0dc0 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -14,8 +14,9 @@ workflow PROFILING { databases // [ [ meta ], path ] main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_raw_profiles = Channel.empty() /* COMBINE READS WITH POSSIBLE DATABASES @@ -89,30 +90,33 @@ workflow PROFILING { if ( params.run_malt ) { MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MALT_RUN.out.rma6 ) } if ( params.run_kraken2 ) { KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) } if ( params.run_centrifuge ) { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report ) } if ( params.run_metaphlan3 ) { METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) } emit: - // TODO work out if there is enough standardisation of output to export as one? - //output = ch_filtered_reads // channel: [ val(meta), [ reads ] ] + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/subworkflows/local/shortread_postprocessing.nf b/subworkflows/local/shortread_postprocessing.nf new file mode 100644 index 0000000..7fb0d70 --- /dev/null +++ b/subworkflows/local/shortread_postprocessing.nf @@ -0,0 +1,39 @@ +// +// Perform read trimming and merging +// + + +include { SHORTREAD_FASTP } from './shortread_fastp' +include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' +include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' + +workflow SHORTREAD_POSTPROCESSING { + take: + input // [ [ meta ], [ taxon_table/file ] ] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.shortread_clipmerge_tool == "fastp" ) { + ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) + } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) + } else { + ch_processed_reads = reads + } + + FASTQC_PROCESSED ( ch_processed_reads ) + ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) + + emit: + output = output // channel: [ val(meta), taxon_table ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} + From cc73cdd51d26330d831ec306f8a5da081fb8665a Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sat, 16 Apr 2022 07:42:30 +0200 Subject: [PATCH 2/4] Add generation of taxon-table like output for MALT --- CITATIONS.md | 10 ++++- conf/modules.config | 15 ++++++- nextflow.config | 1 + nextflow_schema.json | 7 +++- subworkflows/local/profiling.nf | 32 +++++++++++---- .../local/shortread_postprocessing.nf | 39 ------------------- 6 files changed, 52 insertions(+), 52 deletions(-) delete mode 100644 subworkflows/local/shortread_postprocessing.nf diff --git a/CITATIONS.md b/CITATIONS.md index e7bcf47..02621d9 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -40,9 +40,17 @@ > Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6. +- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957) + + > Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957. + - [MetaPhlAn3](https://doi.org/10.7554/eLife.65088) - > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. + > Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088 + +- [Centrifuge](https://doi.org/10.1101/gr.210641.116) + + > Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116. ## Software packaging/containerisation tools diff --git a/conf/modules.config b/conf/modules.config index ccd1748..23455a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -191,11 +191,22 @@ process { withName: MALT_RUN { ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + // one run with multiple samples, so fix ID to just db name to ensure clean log name + ext.prefix = { "${meta.db_name}" } publishDir = [ path: { "${params.outdir}/malt/${meta.db_name}" }, mode: params.publish_dir_mode, - pattern: '*.{log}' + pattern: '*.{rma6,log,sam}' + ] + } + + withName: MEGAN_RMA2INFO { + ext.args = "-c2c Taxonomy" + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/malt/${meta.db_name}" }, + mode: params.publish_dir_mode, + pattern: '*.{txt.gz,megan}' ] } diff --git a/nextflow.config b/nextflow.config index bf3ca92..a618f66 100644 --- a/nextflow.config +++ b/nextflow.config @@ -88,6 +88,7 @@ params { // MALT run_malt = false malt_mode = 'BlastN' + malt_generatemegansummary = false // kraken2 run_kraken2 = false diff --git a/nextflow_schema.json b/nextflow_schema.json index cf0edab..2cbfb0e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -364,11 +364,14 @@ }, "shortread_hostremoval_reference": { "type": "string", - "default": null + "default": "None" }, "shortread_hostremoval_index": { "type": "string", - "default": null + "default": "None" + }, + "malt_generatemegansummary": { + "type": "boolean" } } } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 23b96e7..d7532b7 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -3,6 +3,7 @@ // include { MALT_RUN } from '../../modules/nf-core/modules/malt/run/main' +include { MEGAN_RMA2INFO } from '../../modules/nf-core/modules/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/modules/kraken2/kraken2/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/modules/centrifuge/centrifuge/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' @@ -95,33 +96,48 @@ workflow PROFILING { if ( params.run_malt ) { MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) - ch_raw_profiles = ch_raw_profiles.mix( MALT_RUN.out.rma6 ) + + ch_maltrun_for_megan = MALT_RUN.out.rma6 + .transpose() + .map{ + meta, rma -> + // re-extract meta from file names, use filename without rma to + // ensure we keep paired-end information in downstream filenames + // when no pair-merging + def meta_new = meta.clone() + meta_new['db_name'] = meta.id + meta_new['id'] = rma.name - ( '.' + rma.extension ) + [ meta_new, rma ] + } + + MEGAN_RMA2INFO (ch_maltrun_for_megan, params.malt_generatemegansummary ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MEGAN_RMA2INFO.out.txt ) } if ( params.run_kraken2 ) { KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) } if ( params.run_centrifuge ) { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report ) } if ( params.run_metaphlan3 ) { METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) - ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) + ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) } emit: - profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] - should be text files or biom versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/subworkflows/local/shortread_postprocessing.nf b/subworkflows/local/shortread_postprocessing.nf deleted file mode 100644 index 7fb0d70..0000000 --- a/subworkflows/local/shortread_postprocessing.nf +++ /dev/null @@ -1,39 +0,0 @@ -// -// Perform read trimming and merging -// - - -include { SHORTREAD_FASTP } from './shortread_fastp' -include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' -include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' - -workflow SHORTREAD_POSTPROCESSING { - take: - input // [ [ meta ], [ taxon_table/file ] ] - - main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - if ( params.shortread_clipmerge_tool == "fastp" ) { - ch_processed_reads = SHORTREAD_FASTP ( reads ).reads - ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) - } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { - ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads - ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) - } else { - ch_processed_reads = reads - } - - FASTQC_PROCESSED ( ch_processed_reads ) - ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) - - emit: - output = output // channel: [ val(meta), taxon_table ] - versions = ch_versions // channel: [ versions.yml ] - mqc = ch_multiqc_files -} - From d0256ec9378eac0cad715fe364d997a84e9b312b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sat, 16 Apr 2022 07:45:24 +0200 Subject: [PATCH 3/4] Prettier --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 18c0e69..18dea60 100644 --- a/modules.json +++ b/modules.json @@ -56,4 +56,4 @@ } } } -} \ No newline at end of file +} From 87777a428959bc89f4f6cfda956c43312d4b6d16 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 19 Apr 2022 12:54:57 +0200 Subject: [PATCH 4/4] Update subworkflows/local/profiling.nf Co-authored-by: Moritz E. Beber --- subworkflows/local/profiling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index d7532b7..5f9c111 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -106,7 +106,7 @@ workflow PROFILING { // when no pair-merging def meta_new = meta.clone() meta_new['db_name'] = meta.id - meta_new['id'] = rma.name - ( '.' + rma.extension ) + meta_new['id'] = rma.baseName [ meta_new, rma ] }