From 7dfcdd015b77c28ff172fcca1f568cdef0574e1e Mon Sep 17 00:00:00 2001
From: James Fellows Yates <jfy133@gmail.com>
Date: Thu, 16 Feb 2023 14:02:13 +0100
Subject: [PATCH] Re-add the 'native' combined kaiju output

---
 conf/modules.config                            | 11 ++++++++++-
 docs/output.md                                 |  3 ++-
 subworkflows/local/profiling.nf                |  5 ++---
 subworkflows/local/standardisation_profiles.nf | 16 ++++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)
diff --git a/conf/modules.config b/conf/modules.config
index 815e216..22001be 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -486,7 +486,7 @@ process {
         ext.args = { "${meta.db_params}" }
     }
 
-    withName: KAIJU_KAIJU2TABLE {
+    withName: '.*PROFILING:KAIJU_KAIJU2TABLE' {
         ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
         publishDir = [
             path: { "${params.outdir}/kaiju/${meta.db_name}/" },
@@ -495,6 +495,15 @@ process {
         ]
     }
 
+    withName: '.*STANDARDISATION_PROFILES:KAIJU_KAIJU2TABLE' {
+        ext.prefix = { "kaiju_${meta.id}_combined_reports" }
+        publishDir = [
+            path: { "${params.outdir}/kaiju/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{txt}'
+        ]
+    }
+
     withName: KAIJU_KAIJU2KRONA {
         ext.args = '-v -u'
     }
diff --git a/docs/output.md b/docs/output.md
index f875c7e..be681bd 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -338,13 +338,14 @@ The main taxonomic classification files from Centrifuge are the `_combined_repor
 <summary>Output files</summary>
 
 - `kaiju`
+  - `kaiju_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by kaiju2table)
   - `<db_name>/`
     - `<sample_id>_<db_name>.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID
     - `<sample_id>_<db_name>.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`)
 
 </details>
 
-The most useful summary file is the `*kaijutable.txt` file which summarises hits across all reads. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.
+The most useful summary file is the `_combined_reports.txt` file which summarises hits across all reads and samples. Separate per-sample versions summaries can be seen in `<db>/*.txt`. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.
 
 ### DIAMOND
 
diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf
index cbd8071..a55dcaf 100644
--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@@ -270,13 +270,12 @@ workflow PROFILING {
 
         KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
         ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
 
         KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
         ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
-
         ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
-        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU2TABLE.out.summary )
-
+        ch_raw_profiles    = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
     }
 
     if ( params.run_diamond ) {
diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf
index 303d8fd..582aaed 100644
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@@ -3,6 +3,7 @@
 //
 
 include { BRACKEN_COMBINEBRACKENOUTPUTS                                         } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
+include { KAIJU_KAIJU2TABLE                                                     } from '../../modules/nf-core/kaiju/kaiju2table/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN     } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { METAPHLAN3_MERGEMETAPHLANTABLES                                       } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
@@ -78,6 +79,21 @@ workflow STANDARDISATION_PROFILES {
     ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
     ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )
 
+    // Kaiju
+
+    // Collect and replace id for db_name for prefix
+    ch_profiles_for_kaiju = ch_input_classifications.kaiju
+                                .map { [it[0]['db_name'], it[1]] }
+                                .groupTuple()
+                                .map {
+                                    [[id:it[0]], it[1]]
+                                }
+
+    KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
+    ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary )
+    ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
+    ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
+
     // Kraken2
 
     // Collect and replace id for db_name for prefix