taxprofiler/subworkflows/local/standardisation_profiles.nf

//
// Standardise output files e.g. aggregation
//

include { BRACKEN_COMBINEBRACKENOUTPUTS                                         } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'
include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_COMBINED                       } from '../../modules/nf-core/kaiju/kaiju2table/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN     } from '../../modules/nf-core/krakentools/combinekreports/main'
include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'
include { METAPHLAN3_MERGEMETAPHLANTABLES                                       } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'
include { MOTUS_MERGE                                                           } from '../../modules/nf-core/motus/merge/main'
include { TAXPASTA_MERGE                                                        } from '../../modules/nf-core/taxpasta/merge/main'

workflow STANDARDISATION_PROFILES {
    take:
    classifications
    profiles
    databases
    motu_version

    main:
    ch_versions            = Channel.empty()
    ch_multiqc_files       = Channel.empty()

    //Taxpasta standardisation
    ch_input_for_taxpasta = profiles
                            .map {
                                    meta, profile ->
                                        def meta_new = [:]
                                        meta_new.id = meta.db_name
                                        meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool
                                        [meta_new, profile]
                            }
                            .groupTuple ()
                            .map { [ it[0], it[1].flatten() ] }

    ch_taxpasta_tax_dir = params.taxpasta_taxonomy_dir ? Channel.fromPath(params.taxpasta_taxonomy_dir, checkIfExists: true).collect() : []

    TAXPASTA_MERGE (ch_input_for_taxpasta, ch_taxpasta_tax_dir, [])

    /*
        Split profile results based on tool they come from
    */
    ch_input_profiles = profiles
        .branch {
            bracken: it[0]['tool'] == 'bracken'
            centrifuge: it[0]['tool'] == 'centrifuge'
            kraken2: it[0]['tool'] == 'kraken2'
            metaphlan3: it[0]['tool'] == 'metaphlan3'
            motus: it[0]['tool'] == 'motus'
            unknown: true
        }

    ch_input_classifications = classifications
        .branch {
            kaiju: it[0]['tool'] == 'kaiju'
            unknown: true
        }

    ch_input_databases = databases
        .branch {
            motus: it[0]['tool'] == 'motus'
            kaiju: it[0]['tool'] == 'kaiju'
            unknown: true
        }

    /*
        Standardise and aggregate
    */

    // Bracken

    ch_profiles_for_bracken = ch_input_profiles.bracken
                            .map { [it[0]['db_name'], it[1]] }
                            .groupTuple()
                            .map {
                                [[id:it[0]], it[1]]
                            }

    BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken )

    // CENTRIFUGE

    // Collect and replace id for db_name for prefix
    // Have to sort by size to ensure first file actually has hits otherwise
    // the script fails
    ch_profiles_for_centrifuge = ch_input_profiles.centrifuge
                                .map { [it[0]['db_name'], it[1]] }
                                .groupTuple(sort: {-it.size()} )
                                .map {
                                    [[id:it[0]], it[1]]
                                }


    KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
    ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
    ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )

    // Kaiju

    // Collect and replace id for db_name for prefix
    ch_profiles_for_kaiju = ch_input_classifications.kaiju
                                .map { [it[0]['db_name'], it[1]] }
                                .groupTuple()
                                .map {
                                    [[id:it[0]], it[1]]
                                }

    KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)
    ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )
    ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )

    // Kraken2

    // Collect and replace id for db_name for prefix
    // Have to sort by size to ensure first file actually has hits otherwise
    // the script fails
    ch_profiles_for_kraken2 = ch_input_profiles.kraken2
                                .map { [it[0]['db_name'], it[1]] }
                                .groupTuple(sort: {-it.size()} )
                                .map {
                                    [[id:it[0]], it[1]]
                                }

    KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 )
    ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt )
    ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions )

    // MetaPhlAn3

    ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3
                            .map { [it[0]['db_name'], it[1]] }
                            .groupTuple()
                            .map {
                                [[id:it[0]], it[1]]
                            }

    METAPHLAN3_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan3 )
    ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )
    ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )

    // mOTUs

    // mOTUs has a 'single' database, and cannot create custom ones.
    // Therefore removing db info here, and publish merged at root mOTUs results
    // directory

    ch_profiles_for_motus = ch_input_profiles.motus
                                .map { [it[0]['db_name'], it[1]] }
                                .groupTuple()
                                .map {
                                    [[id:it[0]], it[1]]
                                }

    MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version )
    ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )

    emit:
    taxpasta = TAXPASTA_MERGE.out.merged_profiles
    versions = ch_versions
    mqc      = ch_multiqc_files
}
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`//`
Start work on more merging tools 2022-09-06 15:34:45 +00:00			`// Standardise output files e.g. aggregation`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`//`

Start adding bracken/combinebrackenreports (need smodule update) 2022-12-11 15:52:34 +00:00			`include { BRACKEN_COMBINEBRACKENOUTPUTS } from '../../modules/nf-core/bracken/combinebrackenoutputs/main'`
Fix ugly warn when KAIJU2TABLE not run 2023-03-06 07:17:50 +00:00			`include { KAIJU_KAIJU2TABLE as KAIJU_KAIJU2TABLE_COMBINED } from '../../modules/nf-core/kaiju/kaiju2table/main'`
Fix centrifuge warning, filter out long reads from Brakcen, add some caveat docs to usage.md 2022-12-02 11:47:14 +00:00			`include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_KRAKEN } from '../../modules/nf-core/krakentools/combinekreports/main'`
fix: adjust import paths to re-organization 2022-10-05 11:40:43 +00:00			`include { KRAKENTOOLS_COMBINEKREPORTS as KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE } from '../../modules/nf-core/krakentools/combinekreports/main'`
			`include { METAPHLAN3_MERGEMETAPHLANTABLES } from '../../modules/nf-core/metaphlan3/mergemetaphlantables/main'`
			`include { MOTUS_MERGE } from '../../modules/nf-core/motus/merge/main'`
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00			`include { TAXPASTA_MERGE } from '../../modules/nf-core/taxpasta/merge/main'`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00
			`workflow STANDARDISATION_PROFILES {`
			`take:`
			`classifications`
			`profiles`
			`databases`
			`motu_version`

			`main:`
			`ch_versions = Channel.empty()`
Start work on more merging tools 2022-09-06 15:34:45 +00:00			`ch_multiqc_files = Channel.empty()`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00			`//Taxpasta standardisation`
			`ch_input_for_taxpasta = profiles`
			`.map {`
Fix KrakenUniq Taxpasta issue 2023-02-22 07:24:48 +00:00			`meta, profile ->`
			`def meta_new = [:]`
			`meta_new.id = meta.db_name`
			`meta_new.tool = meta.tool == 'metaphlan3' ? 'metaphlan' : meta.tool == 'malt' ? 'megan6' : meta.tool`
			`[meta_new, profile]`
			`}`
			`.groupTuple ()`
			`.map { [ it[0], it[1].flatten() ] }`
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00
Fix staging 2023-05-11 13:04:37 +00:00			`ch_taxpasta_tax_dir = params.taxpasta_taxonomy_dir ? Channel.fromPath(params.taxpasta_taxonomy_dir, checkIfExists: true).collect() : []`

			`TAXPASTA_MERGE (ch_input_for_taxpasta, ch_taxpasta_tax_dir, [])`
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`/*`
			`Split profile results based on tool they come from`
			`*/`
			`ch_input_profiles = profiles`
			`.branch {`
Start adding bracken/combinebrackenreports (need smodule update) 2022-12-11 15:52:34 +00:00			`bracken: it[0]['tool'] == 'bracken'`
Add centrigue kreports 2022-09-13 14:47:19 +00:00			`centrifuge: it[0]['tool'] == 'centrifuge'`
Start adding bracken/combinebrackenreports (need smodule update) 2022-12-11 15:52:34 +00:00			`kraken2: it[0]['tool'] == 'kraken2'`
Start works - need updated module though t have meta 2022-09-09 11:57:06 +00:00			`metaphlan3: it[0]['tool'] == 'metaphlan3'`
Start adding bracken/combinebrackenreports (need smodule update) 2022-12-11 15:52:34 +00:00			`motus: it[0]['tool'] == 'motus'`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`unknown: true`
			`}`

			`ch_input_classifications = classifications`
			`.branch {`
Start work on more merging tools 2022-09-06 15:34:45 +00:00			`kaiju: it[0]['tool'] == 'kaiju'`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`unknown: true`
			`}`

			`ch_input_databases = databases`
			`.branch {`
			`motus: it[0]['tool'] == 'motus'`
Start work on more merging tools 2022-09-06 15:34:45 +00:00			`kaiju: it[0]['tool'] == 'kaiju'`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`unknown: true`
			`}`

			`/*`
			`Standardise and aggregate`
			`*/`

Start adding bracken/combinebrackenreports (need smodule update) 2022-12-11 15:52:34 +00:00			`// Bracken`

			`ch_profiles_for_bracken = ch_input_profiles.bracken`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple()`
			`.map {`
			`[[id:it[0]], it[1]]`
			`}`

			`BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken )`

			`// CENTRIFUGE`
Add centrigue kreports 2022-09-13 14:47:19 +00:00
			`// Collect and replace id for db_name for prefix`
			`// Have to sort by size to ensure first file actually has hits otherwise`
			`// the script fails`
			`ch_profiles_for_centrifuge = ch_input_profiles.centrifuge`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple(sort: {-it.size()} )`
			`.map {`
			`[[id:it[0]], it[1]]`
			`}`

Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00
Add centrigue kreports 2022-09-13 14:47:19 +00:00			`KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )`
			`ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )`
			`ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.versions )`

Start work on more merging tools 2022-09-06 15:34:45 +00:00			`// Kaiju`

			`// Collect and replace id for db_name for prefix`
			`ch_profiles_for_kaiju = ch_input_classifications.kaiju`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple()`
			`.map {`
			`[[id:it[0]], it[1]]`
			`}`

Fix ugly warn when KAIJU2TABLE not run 2023-03-06 07:17:50 +00:00			`KAIJU_KAIJU2TABLE_COMBINED ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank)`
			`ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE_COMBINED.out.summary )`
			`ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE_COMBINED.out.versions )`
Start work on more merging tools 2022-09-06 15:34:45 +00:00
Adds KRAKEN_COMBINEKREPORTS for 'native' multi-sample OTU 2022-09-13 13:47:28 +00:00			`// Kraken2`

			`// Collect and replace id for db_name for prefix`
Add centrigue kreports 2022-09-13 14:47:19 +00:00			`// Have to sort by size to ensure first file actually has hits otherwise`
			`// the script fails`
Adds KRAKEN_COMBINEKREPORTS for 'native' multi-sample OTU 2022-09-13 13:47:28 +00:00			`ch_profiles_for_kraken2 = ch_input_profiles.kraken2`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple(sort: {-it.size()} )`
			`.map {`
			`[[id:it[0]], it[1]]`
			`}`

Fix centrifuge warning, filter out long reads from Brakcen, add some caveat docs to usage.md 2022-12-02 11:47:14 +00:00			`KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 )`
			`ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt )`
			`ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions )`
Adds KRAKEN_COMBINEKREPORTS for 'native' multi-sample OTU 2022-09-13 13:47:28 +00:00
Start works - need updated module though t have meta 2022-09-09 11:57:06 +00:00			`// MetaPhlAn3`
fix: adjust import paths to re-organization 2022-10-05 11:40:43 +00:00
Start works - need updated module though t have meta 2022-09-09 11:57:06 +00:00			`ch_profiles_for_metaphlan3 = ch_input_profiles.metaphlan3`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple()`
			`.map {`
Ensure merge metaphlan3 tables happens on per database basis 2022-09-15 10:29:54 +00:00			`[[id:it[0]], it[1]]`
Start works - need updated module though t have meta 2022-09-09 11:57:06 +00:00			`}`

			`METAPHLAN3_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan3 )`
			`ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.txt )`
			`ch_versions = ch_versions.mix( METAPHLAN3_MERGEMETAPHLANTABLES.out.versions )`
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00
Start works - need updated module though t have meta 2022-09-09 11:57:06 +00:00			`// mOTUs`

Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`// mOTUs has a 'single' database, and cannot create custom ones.`
			`// Therefore removing db info here, and publish merged at root mOTUs results`
			`// directory`
Standardise mOTUs table merge 2022-09-16 06:36:34 +00:00
			`ch_profiles_for_motus = ch_input_profiles.motus`
			`.map { [it[0]['db_name'], it[1]] }`
			`.groupTuple()`
			`.map {`
			`[[id:it[0]], it[1]]`
			`}`

Update motus merge module to remove unused channel 2022-09-16 08:25:24 +00:00			`MOTUS_MERGE ( ch_profiles_for_motus, ch_input_databases.motus.map{it[1]}, motu_version )`
Fix the motus_versions 2023-02-20 13:27:57 +00:00			`ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions )`
Fix ugly warn when KAIJU2TABLE not run 2023-03-06 07:17:50 +00:00
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`emit:`
Add taxpasta_merge to taxprofiler 2023-02-16 13:29:52 +00:00			`taxpasta = TAXPASTA_MERGE.out.merged_profiles`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`versions = ch_versions`
Start work on more merging tools 2022-09-06 15:34:45 +00:00			`mqc = ch_multiqc_files`
Add motus/merge and biom support 2022-07-12 09:39:26 +00:00			`}`