diff --git a/conf/modules.config b/conf/modules.config index 8ef8728..815e216 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -487,9 +487,9 @@ process { } withName: KAIJU_KAIJU2TABLE { - ext.prefix = { "kaiju_${meta.id}_combined_reports" } + ext.prefix = params.perform_runmerging ? { "${meta.id}_${meta.db_name}.kaijutable" } : { "${meta.id}_${meta.run_accession}_${meta.db_name}.kaijutable" } publishDir = [ - path: { "${params.outdir}/kaiju/" }, + path: { "${params.outdir}/kaiju/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt}' ] diff --git a/docs/output.md b/docs/output.md index 8c5bc5c..1a03109 100644 --- a/docs/output.md +++ b/docs/output.md @@ -332,18 +332,19 @@ The main taxonomic classification files from Centrifuge are the `_combined_repor ### Kaiju -[Kaiju](https://github.com/bioinformatics-centre/kaiju) is a taxonomic classifier that finds maximum exact matches on the protein-level using the Burrows–Wheeler transform. +[Kaiju](https://github.com/bioinformatics-centre/kaiju) is a taxonomic classifier that finds maximum exact matches on the protein-level using the Burrows-Wheeler transform.
Output files - `kaiju` - - `.tsv`: A file that summarises the fraction abundance, taxonomic ID, number of reads and taxonomic names - - `kaiju__combined_reports.txt`: A combined profile of all samples aligned to a given database (as generated by `kaiju2table`) + - `/` + - `_.kaiju.tsv`: Raw output from Kaiju with taxonomic rank, read ID and taxonic ID + - `_.kaijutable.txt`: Summarised Kaiju output with fraction abundance, taxonomic ID, number of reads, and taxonomic names (as generated by `kaiju2table`)
-The most summary file is the `*combined_reports.txt` file which summarises results across all samples. However if you wish to look at more precise information about each assignment, check the per-sample file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`. +The most useful summary file is the `*kaijutable.txt` file which summarises hits across all treads. However if you wish to look at more precise information on a per-read bases `*tsv` file. The default taxonomic rank is `species`. You can provide a different one by updating the argument `--kaiju_taxon_rank`. ### DIAMOND diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index cf5a9b8..cbd8071 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -5,12 +5,13 @@ include { MALT_RUN } from '../../modules/nf-core/malt/run/main' include { MEGAN_RMA2INFO as MEGAN_RMA2INFO_TSV } from '../../modules/nf-core/megan/rma2info/main' include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/kraken2/main' -include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report' +include { KRAKEN2_STANDARD_REPORT } from '../../modules/local/kraken2_standard_report' include { BRACKEN_BRACKEN } from '../../modules/nf-core/bracken/bracken/main' include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/centrifuge/centrifuge/main' include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/centrifuge/kreport/main' include { METAPHLAN3_METAPHLAN3 } from '../../modules/nf-core/metaphlan3/metaphlan3/main' include { KAIJU_KAIJU } from '../../modules/nf-core/kaiju/kaiju/main' +include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/diamond/blastx/main' include { MOTUS_PROFILE } from '../../modules/nf-core/motus/profile/main' include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main' @@ -269,7 +270,12 @@ workflow PROFILING { KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) - ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) + + KAIJU_KAIJU2TABLE ( KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank) + ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) + + ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) + ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU2TABLE.out.summary ) } diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 582aaed..303d8fd 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -3,7 +3,6 @@ // include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main' -include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/kaiju/kaiju2table/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main' include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main' include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main' @@ -79,21 +78,6 @@ workflow STANDARDISATION_PROFILES { ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions ) - // Kaiju - - // Collect and replace id for db_name for prefix - ch_profiles_for_kaiju = ch_input_classifications.kaiju - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } - - KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank) - ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary ) - ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) - ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) - // Kraken2 // Collect and replace id for db_name for prefix