Move kaiju2table to profiling

2024-12-22 10:18:17 +00:00 · 2023-02-16 13:39:05 +01:00 · 2023-02-16 13:39:05 +01:00 · 79b4db9a34
commit 79b4db9a34
parent 21d9135b14
4 changed files with 15 additions and 24 deletions
--- a/conf/modules.config
+++ b/conf/modules.config
@ -487,9 +487,9 @@ process {
    }

    withName: KAIJU_KAIJU2TABLE {
-        ext.prefix = { "kaiju_${meta.id}_combined_reports" }
+        ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" }
        publishDir = [
-            path: { "${params.outdir}/kaiju/" },
+            path: { "${params.outdir}/kaiju/${meta.db_name}/" },
            mode: params.publish_dir_mode,
            pattern: '*.{txt}'
        ]
--- a/docs/output.md
+++ b/docs/output.md
@ -332,18 +332,19 @@ The main taxonomic classification files from Centrifuge are the `_combined_repor

 ### Kaiju

-[Kaiju](https://github.com/bioinformatics-centre/kaiju) is a taxonomic classifier that finds maximum exact matches on the protein-level using the Burrows–Wheeler transform.
+[Kaiju](https://github.com/bioinformatics-centre/kaiju) is a taxonomic classifier that finds maximum exact matches on the protein-level using the Burrows-Wheeler transform.

 <details markdown="1">
 <summary>Output files</summary>

 - `kaiju`
-  - `<sample_id>.tsv`: A file that summarises the fraction abundance, taxonomic ID, number of reads and taxonomic names
-  - `kaiju_<db_name>_combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `kaiju2table`)
+  - `<db_name>/`
+    - `<sample_id>_<db_name>.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID
+    - `<sample_id>_<db_name>.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`)

 </details>

-The most summary file is the `*combined_reports.txt` file which summarises results across all samples. However if you wish to look at more precise information about each assignment, check the per-sample file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.
+The most useful summary file is the `*kaijutable.txt` file which summarises hits across all treads. However if you wish to look at more precise information on a per-read bases `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`.

 ### DIAMOND

--- a/subworkflows/local/profiling.nf
+++ b/subworkflows/local/profiling.nf
@ -5,12 +5,13 @@
 include { MALT_RUN                              } from '../../modules/nf-core/malt/run/main'
 include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV  } from '../../modules/nf-core/megan/rma2info/main'
 include { KRAKEN2_KRAKEN2                       } from '../../modules/nf-core/kraken2/kraken2/main'
-include { KRAKEN2_STANDARD_REPORT                } from '../../modules/local/kraken2_standard_report'
+include { KRAKEN2_STANDARD_REPORT               } from '../../modules/local/kraken2_standard_report'
 include { BRACKEN_BRACKEN                       } from '../../modules/nf-core/bracken/bracken/main'
 include { CENTRIFUGE_CENTRIFUGE                 } from '../../modules/nf-core/centrifuge/centrifuge/main'
 include { CENTRIFUGE_KREPORT                    } from '../../modules/nf-core/centrifuge/kreport/main'
 include { METAPHLAN3_METAPHLAN3                 } from '../../modules/nf-core/metaphlan3/metaphlan3/main'
 include { KAIJU_KAIJU                           } from '../../modules/nf-core/kaiju/kaiju/main'
+include { KAIJU_KAIJU2TABLE                     } from '../../modules/nf-core/kaiju/kaiju2table/main'
 include { DIAMOND_BLASTX                        } from '../../modules/nf-core/diamond/blastx/main'
 include { MOTUS_PROFILE                         } from '../../modules/nf-core/motus/profile/main'
 include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ        } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main'
@ -269,7 +270,12 @@ workflow PROFILING {

        KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db)
        ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() )
-        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results )
+
+        KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank)
+        ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
+
+        ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
+        ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU2TABLE.out.summary )

    }

--- a/subworkflows/local/standardisation_profiles.nf
+++ b/subworkflows/local/standardisation_profiles.nf
@ -3,7 +3,6 @@
 //

 include { BRACKEN_COMBINEBRACKENOUTPUTS                                         } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
-include { KAIJU_KAIJU2TABLE                                                     } from '../../modules/nf-core/kaiju/kaiju2table/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN     } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
 include { METAPHLAN3_MERGEMETAPHLANTABLES                                       } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
@ -79,21 +78,6 @@ workflow STANDARDISATION_PROFILES {
    ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
    ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )

-    // Kaiju
-
-    // Collect and replace id for db_name for prefix
-    ch_profiles_for_kaiju = ch_input_classifications.kaiju
-                                .map { [it[0]['db_name'], it[1]] }
-                                .groupTuple()
-                                .map {
-                                    [[id:it[0]], it[1]]
-                                }
-
-    KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
-    ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary )
-    ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary )
-    ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions )
-
    // Kraken2

    // Collect and replace id for db_name for prefix