From de5734052601ce6bedd7be2389fef4bacd0affb1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 13 Apr 2022 11:49:35 +0200 Subject: [PATCH] Start work --- modules.json | 5 +- .../nf-core/modules/megan/rma2info/main.nf | 38 ++++++++++++++ .../nf-core/modules/megan/rma2info/meta.yml | 51 +++++++++++++++++++ nextflow.config | 1 + subworkflows/local/profiling.nf | 22 ++++---- .../local/shortread_postprocessing.nf | 39 ++++++++++++++ 6 files changed, 146 insertions(+), 10 deletions(-) create mode 100644 modules/nf-core/modules/megan/rma2info/main.nf create mode 100644 modules/nf-core/modules/megan/rma2info/meta.yml create mode 100644 subworkflows/local/shortread_postprocessing.nf diff --git a/modules.json b/modules.json index 7fbc65c..e921454 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,9 @@ "malt/run": { "git_sha": "72b96f4e504eef673f2b5c13560a9d90b669129b" }, + "megan/rma2info": { + "git_sha": "2d38566eca4cc15142b2ffa7c11837569b39aece" + }, "metaphlan3": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -47,4 +50,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/megan/rma2info/main.nf b/modules/nf-core/modules/megan/rma2info/main.nf new file mode 100644 index 0000000..80d1975 --- /dev/null +++ b/modules/nf-core/modules/megan/rma2info/main.nf @@ -0,0 +1,38 @@ +process MEGAN_RMA2INFO { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::megan=6.21.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/megan:6.21.7--h9ee0642_0': + 'quay.io/biocontainers/megan:6.21.7--h9ee0642_0' }" + + input: + tuple val(meta), path(rma6) + val(megan_summary) + + output: + tuple val(meta), path("*.txt.gz") , emit: txt + tuple val(meta), path("*.megan"), optional: true, emit: megan_summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def summary = megan_summary ? "-es ${prefix}.megan" : "" + """ + rma2info \\ + -i ${rma6} \\ + -o ${prefix}.txt.gz \\ + ${summary} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megan: \$(echo \$(rma2info 2>&1) | grep version | sed 's/.*version //g;s/, built.*//g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/megan/rma2info/meta.yml b/modules/nf-core/modules/megan/rma2info/meta.yml new file mode 100644 index 0000000..0f2d5a9 --- /dev/null +++ b/modules/nf-core/modules/megan/rma2info/meta.yml @@ -0,0 +1,51 @@ +name: "megan_rma2info" +description: Analyses an RMA file and exports information in text format +keywords: + - megan + - rma6 + - classification + - conversion +tools: + - "megan": + description: "A tool for studying the taxonomic content of a set of DNA reads" + homepage: "https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/megan6/" + documentation: "https://software-ab.informatik.uni-tuebingen.de/download/megan6/welcome.html" + tool_dev_url: "https://github.com/husonlab/megan-ce" + doi: "10.1371/journal.pcbi.1004957" + licence: "['GPL >=3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - rma6: + type: file + description: RMA6 file from MEGAN or MALT + pattern: "*.rma6" + - megan_summary: + type: boolean + description: Specify whether to generate an MEGAN summary file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Compressed text file + pattern: "*.txt.gz" + - megan_summary: + type: file + description: Optionally generated MEGAN summary file + pattern: "*.megan" + +authors: + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index b4a8d91..8ddf365 100644 --- a/nextflow.config +++ b/nextflow.config @@ -89,6 +89,7 @@ params { centrifuge_save_unaligned = false centrifuge_save_aligned = false centrifuge_sam_format = false + // metaphlan3 run_metaphlan3 = false } diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index c74c583..59b0dc0 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -14,8 +14,9 @@ workflow PROFILING { databases // [ [ meta ], path ] main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_raw_profiles = Channel.empty() /* COMBINE READS WITH POSSIBLE DATABASES @@ -89,30 +90,33 @@ workflow PROFILING { if ( params.run_malt ) { MALT_RUN ( ch_input_for_malt.reads, params.malt_mode, ch_input_for_malt.db ) - ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( MALT_RUN.out.log.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( MALT_RUN.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( MALT_RUN.out.rma6 ) } if ( params.run_kraken2 ) { KRAKEN2_KRAKEN2 ( ch_input_for_kraken2.reads, ch_input_for_kraken2.db ) - ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) - ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_multiqc_files = ch_multiqc_files.mix( KRAKEN2_KRAKEN2.out.txt.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix( KRAKEN2_KRAKEN2.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( KRAKEN2_KRAKEN2.out.txt ) } if ( params.run_centrifuge ) { CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_unaligned, params.centrifuge_save_aligned, params.centrifuge_sam_format ) - ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( CENTRIFUGE_CENTRIFUGE.out.report ) } if ( params.run_metaphlan3 ) { METAPHLAN3 ( ch_input_for_metaphlan3.reads, ch_input_for_metaphlan3.db ) ch_versions = ch_versions.mix( METAPHLAN3.out.versions.first() ) + ch_raw_profiles = ch_raw_profiles.mix( METAPHLAN3.out.biom ) } emit: - // TODO work out if there is enough standardisation of output to export as one? - //output = ch_filtered_reads // channel: [ val(meta), [ reads ] ] + profiles = ch_raw_profiles // channel: [ val(meta), [ reads ] ] versions = ch_versions // channel: [ versions.yml ] mqc = ch_multiqc_files } diff --git a/subworkflows/local/shortread_postprocessing.nf b/subworkflows/local/shortread_postprocessing.nf new file mode 100644 index 0000000..7fb0d70 --- /dev/null +++ b/subworkflows/local/shortread_postprocessing.nf @@ -0,0 +1,39 @@ +// +// Perform read trimming and merging +// + + +include { SHORTREAD_FASTP } from './shortread_fastp' +include { SHORTREAD_ADAPTERREMOVAL } from './shortread_adapterremoval' +include { FASTQC as FASTQC_PROCESSED } from '../../modules/nf-core/modules/fastqc/main' + +workflow SHORTREAD_POSTPROCESSING { + take: + input // [ [ meta ], [ taxon_table/file ] ] + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + + if ( params.shortread_clipmerge_tool == "fastp" ) { + ch_processed_reads = SHORTREAD_FASTP ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_FASTP.out.mqc ) + } else if ( params.shortread_clipmerge_tool == "adapterremoval" ) { + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL ( reads ).reads + ch_versions = ch_versions.mix( SHORTREAD_ADAPTERREMOVAL.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_ADAPTERREMOVAL.out.mqc ) + } else { + ch_processed_reads = reads + } + + FASTQC_PROCESSED ( ch_processed_reads ) + ch_versions = ch_versions.mix( FASTQC_PROCESSED.out.versions ) + ch_multiqc_files = ch_multiqc_files.mix( FASTQC_PROCESSED.out.zip ) + + emit: + output = output // channel: [ val(meta), taxon_table ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} +