From 7dfcdd015b77c28ff172fcca1f568cdef0574e1e Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 16 Feb 2023 14:02:13 +0100 Subject: [PATCH] Re-add the 'native' combined kaiju output --- conf/modules.config | 11 ++++++++++- docs/output.md | 3 ++- subworkflows/local/profiling.nf | 5 ++--- subworkflows/local/standardisation_profiles.nf | 16 ++++++++++++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 815e216..22001be 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -486,7 +486,7 @@ process { ext.args = { "${meta.db_params}" } } - withName: KAIJU_KAIJU2TABLE { + withName: '.*PROFILING:KAIJU_KAIJU2TABLE' { ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" } publishDir = [ path: { "${params.outdir}/kaiju/${meta.db_name}/" }, @@ -495,6 +495,15 @@ process { ] } + withName: '.*STANDARDISATION_PROFILES:KAIJU_KAIJU2TABLE' { + ext.prefix = { "kaiju_${meta.id}_combined_reports" } + publishDir = [ + path: { "${params.outdir}/kaiju/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt}' + ] + } + withName: KAIJU_KAIJU2KRONA { ext.args = '-v -u' } diff --git a/docs/output.md b/docs/output.md index f875c7e..be681bd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -338,13 +338,14 @@ The main taxonomic classification files from Centrifuge are the `_combined_repor Output files - `kaiju` + - `kaiju__combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by kaiju2table) - `/` - `_.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID - `_.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`) -The most useful summary file is the `*kaijutable.txt` file which summarises hits across all reads. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`. +The most useful summary file is the `_combined_reports.txt` file which summarises hits across all reads and samples. Separate per-sample versions summaries can be seen in `/*.txt`. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`. ### DIAMOND diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index cbd8071..a55dcaf 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -270,13 +270,12 @@ workflow PROFILING { KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) + ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank) ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) - ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) - ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU2TABLE.out.summary ) - + ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } if ( params.run_diamond ) { diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 303d8fd..582aaed 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -3,6 +3,7 @@ // include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main' +include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' @@ -78,6 +79,21 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions ) + // Kaiju + + // Collect and replace id for db_name for prefix + ch_profiles_for_kaiju = ch_input_classifications.kaiju + .map { [it[0]['db_name'], it[1]] } + .groupTuple() + .map { + [[id:it[0]], it[1]] + } + + KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank) + ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary ) + ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) + ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) + // Kraken2 // Collect and replace id for db_name for prefix