From 8939ce3e2088a7ba89c0fdbf29901e98df4e231f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 11 Dec 2022 16:52:34 +0100 Subject: [PATCH] Start adding bracken/combinebrackenreports (need smodule update) --- conf/modules.config | 9 +++++ modules.json | 5 +++ .../bracken/combinebrackenoutputs/main.nf | 36 +++++++++++++++++++ .../bracken/combinebrackenoutputs/meta.yml | 31 ++++++++++++++++ .../local/standardisation_profiles.nf | 19 ++++++++-- 5 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/bracken/combinebrackenoutputs/main.nf create mode 100644 modules/nf-core/bracken/combinebrackenoutputs/meta.yml diff --git a/conf/modules.config b/conf/modules.config index dd85c0c..a8bf3e8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -321,6 +321,15 @@ process { ] } + withName: BRACKEN_COMBINEBRACKENOUTPUTS { + ext.prefix = { "bracken_${meta.id}_combined_reports" } + publishDir = [ + path: { "${params.outdir}/bracken/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } + withName: KRAKENTOOLS_COMBINEKREPORTS { ext.prefix = { "kraken2_${meta.id}_combined_reports" } publishDir = [ diff --git a/modules.json b/modules.json index 377e902..7815f5d 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "8cab56516076b23c6f8eb1ac20ba4ce9692c85e1", "installed_by": ["modules"] }, + "bracken/combinebrackenoutputs": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", diff --git a/modules/nf-core/bracken/combinebrackenoutputs/main.nf b/modules/nf-core/bracken/combinebrackenoutputs/main.nf new file mode 100644 index 0000000..73b7148 --- /dev/null +++ b/modules/nf-core/bracken/combinebrackenoutputs/main.nf @@ -0,0 +1,36 @@ +process BRACKEN_COMBINEBRACKENOUTPUTS { + label 'process_low' + + conda (params.enable_conda ? "bioconda::bracken=2.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bracken:2.7--py39hc16433a_0': + 'quay.io/biocontainers/bracken:2.7--py39hc16433a_0' }" + + input: + path input + + output: + path "*.txt" , emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "bracken_combined.txt" + // WARN: Version information not provided by tool on CLI. + // Please update version string below when bumping container versions. + def VERSION = '2.7' + """ + combine_bracken_outputs.py \\ + $args \\ + --files ${input} \\ + -o ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + combine_bracken_output: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/bracken/combinebrackenoutputs/meta.yml b/modules/nf-core/bracken/combinebrackenoutputs/meta.yml new file mode 100644 index 0000000..c4a9712 --- /dev/null +++ b/modules/nf-core/bracken/combinebrackenoutputs/meta.yml @@ -0,0 +1,31 @@ +name: "bracken_combinebrackenoutputs" +description: Combine output of metagenomic samples analyzed by bracken. +keywords: + - sort +tools: + - "bracken": + description: Bracken (Bayesian Reestimation of Abundance with KrakEN) is a highly accurate statistical method that computes the abundance of species in DNA sequences from a metagenomics sample. + homepage: https://ccb.jhu.edu/software/bracken/ + documentation: https://ccb.jhu.edu/software/bracken/index.shtml?t=manual + tool_dev_url: https://github.com/jenniferlu717/Bracken + doi: "10.7717/peerj-cs.104" + licence: ["GPL v3"] + +input: + - input: + type: file + description: List of output files from bracken + pattern: "*" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - txt: + type: file + description: Combined output in table format + pattern: "*.txt" + +authors: + - "@jfy133" diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 8c73472..582aaed 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -2,6 +2,7 @@ // Standardise output files e.g. aggregation // +include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main' include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' @@ -25,10 +26,11 @@ workflow STANDARDISATION_PROFILES { */ ch_input_profiles = profiles .branch { - motus: it[0]['tool'] == 'motus' - kraken2: it[0]['tool'] == 'kraken2' + bracken: it[0]['tool'] == 'bracken' centrifuge: it[0]['tool'] == 'centrifuge' + kraken2: it[0]['tool'] == 'kraken2' metaphlan3: it[0]['tool'] == 'metaphlan3' + motus: it[0]['tool'] == 'motus' unknown: true } @@ -49,7 +51,18 @@ workflow STANDARDISATION_PROFILES { Standardise and aggregate */ - // CENTRIFUGE + // Bracken + + ch_profiles_for_bracken = ch_input_profiles.bracken + .map { [it[0]['db_name'], it[1]] } + .groupTuple() + .map { + [[id:it[0]], it[1]] + } + + BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken ) + + // CENTRIFUGE // Collect and replace id for db_name for prefix // Have to sort by size to ensure first file actually has hits otherwise