Re-add the 'native' combined kaiju output

2024-11-24 20:09:54 +00:00 · 2023-02-16 14:02:13 +01:00 · 2023-02-16 14:02:13 +01:00 · 7dfcdd015b
commit 7dfcdd015b
parent 4e818efe6d
4 changed files with 30 additions and 5 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -486,7 +486,7 @@ process {
        ext.args = { "${meta.db_params}" }
    }

-    withName: KAIJU_KAIJU2TABLE {
+    withName: '.*PROFILING:KAIJU_KAIJU2TABLE' {
        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
        publishDir = [
            path: { "${params.outdir}/kaiju/${meta.db_name}/" },
@ -495,6 +495,15 @@ process {
        ]
    }

+    withName: '.*STANDARDISATION_PROFILES:KAIJU_KAIJU2TABLE' {
+        ext.prefix = { "kaiju_${meta.id}_combined_reports" }
+        publishDir = [
+            path: { "${params.outdir}/kaiju/" },
+            mode: params.publish_dir_mode,
+            pattern: '*.{txt}'
+        ]
+    }
+
    withName: KAIJU_KAIJU2KRONA {
        ext.args = '-v -u'
    }
--- a/docs/output.md
+++ b/docs/output.md
@ -338,13 +338,14 @@ The main taxonomic classification files from Centrifuge are the `_combined_repor
 <summary>Output files</summary>

 - `kaiju`
+  - `kaiju_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by kaiju2table)
  - `<db_name>/`
    - `<sample_id>_<db_name>.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID
    - `<sample_id>_<db_name>.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`)

 </details>

-The most useful summary file is the `*kaijutable.txt` file which summarises hits across all reads. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.
+The most useful summary file is the `_combined_reports.txt` file which summarises hits across all reads and samples. Separate per-sample versions summaries can be seen in `<db>/*.txt`. However if you wish to look at more precise information on a per-read basis, see the `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.

 ### DIAMOND

--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -270,13 +270,12 @@ workflow PROFILING {

        KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
        ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
+        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )

        KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
        ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
-
        ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
-        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU2TABLE.out.summary )
-
+        ch_raw_profiles    = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary )
    }

    if ( params.run_diamond ) {
--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@ -3,6 +3,7 @@
 //

 include { BRACKEN_COMBINEBRACKENOUTPUTS                                         } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
+include { KAIJU_KAIJU2TABLE                                                     } from '../../modules/nf-core/kaiju/kaiju2table/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN     } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { METAPHLAN3_MERGEMETAPHLANTABLES                                       } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
@ -78,6 +79,21 @@ workflow STANDARDISATION_PROFILES {
    ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
    ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )

+    // Kaiju
+
+    // Collect and replace id for db_name for prefix
+    ch_profiles_for_kaiju = ch_input_classifications.kaiju
+                                .map { [it[0]['db_name'], it[1]] }
+                                .groupTuple()
+                                .map {
+                                    [[id:it[0]], it[1]]
+                                }
+
+    KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
+    ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary )
+    ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
+    ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
+
    // Kraken2

    // Collect and replace id for db_name for prefix