From 7c5f9d0c24315a793c79a01c4b6dd5e0e8e42ffd Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 6 Sep 2022 17:34:45 +0200 Subject: [PATCH 1/3] Start work on more merging tools --- conf/modules.config | 31 +++++++++---------- conf/test_nothing.config | 2 +- subworkflows/local/profiling.nf | 4 --- .../local/standardisation_profiles.nf | 24 ++++++++++++-- workflows/taxprofiler.nf | 6 ++++ 5 files changed, 44 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d4a2693..7c1ceba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -268,7 +268,7 @@ process { // one run with multiple samples, so fix ID to just db name to ensure clean log name ext.prefix = { "${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/malt/${meta.db_name}" }, + path: { "${params.outdir}/malt/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{rma6,log,sam}' ] @@ -278,7 +278,7 @@ process { ext.args = "-c2c Taxonomy" ext.prefix = { "${meta.id}" } publishDir = [ - path: { "${params.outdir}/malt/${meta.db_name}" }, + path: { "${params.outdir}/malt/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{txt.gz,megan}' ] @@ -288,7 +288,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/kraken2/${meta.db_name}" }, + path: { "${params.outdir}/kraken2/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{txt,report,fastq.gz}' ] @@ -297,7 +297,7 @@ process { withName: KRONA_CLEANUP { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/krona" }, + path: { "${params.outdir}/krona/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -306,7 +306,7 @@ process { withName: KRONA_KTIMPORTTEXT { ext.prefix = { "${meta.tool}-${meta.id}" } publishDir = [ - path: { "${params.outdir}/krona" }, + path: { "${params.outdir}/krona/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -321,7 +321,7 @@ process { ext.args = "-i" ext.prefix = { "${meta.tool}-${meta.id}" } publishDir = [ - path: { "${params.outdir}/krona" }, + path: { "${params.outdir}/krona/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -331,7 +331,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/metaphlan3/${meta.db_name}" }, + path: { "${params.outdir}/metaphlan3/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] @@ -339,7 +339,7 @@ process { withName: CENTRIFUGE_CENTRIFUGE { publishDir = [ - path: { "${params.outdir}/centrifuge/${meta.db_name}" }, + path: { "${params.outdir}/centrifuge/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{txt,sam,gz}' ] @@ -352,27 +352,26 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" } publishDir = [ - path: { "${params.outdir}/centrifuge/${meta.db_name}" }, + path: { "${params.outdir}/centrifuge/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{txt}' ] } withName: KAIJU_KAIJU { + ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/kaiju/${meta.db_name}" }, + path: { "${params.outdir}/kaiju/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.tsv' ] ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: KAIJU_KAIJU2TABLE { - ext.args = { "${meta.db_params}" } - ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = { "${meta.id}_combined_reports" } publishDir = [ - path: { "${params.outdir}/kaiju/${meta.db_name}" }, + path: { "${params.outdir}/kaiju/" }, mode: params.publish_dir_mode, pattern: '*.{txt}' ] @@ -386,7 +385,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/diamond/${meta.db_name}" }, + path: { "${params.outdir}/diamond/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode, pattern: '*.{blast,xml,txt,daa,sam,tsv,paf,log}' ] @@ -395,7 +394,7 @@ process { withName: MOTUS_PROFILE { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/motus/${meta.db_name}" }, + path: { "${params.outdir}/motus/${meta.db_name}/${ext.prefix}" }, mode: params.publish_dir_mode ] } diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 5cd89eb..91b0aa4 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -12,7 +12,7 @@ params { config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to 'opt-out' of all the others' + config_profile_description = 'Minimal test dataset without performing any preprocessing nor profiling to check pipeline function. Useful when you only wish to test a single profiler without having to opt-out of all the others' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index 17c96f1..c4c5195 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -9,7 +9,6 @@ include { CENTRIFUGE_CENTRIFUGE } from '../../modules/nf-core/mo include { CENTRIFUGE_KREPORT } from '../../modules/nf-core/modules/centrifuge/kreport/main' include { METAPHLAN3 } from '../../modules/nf-core/modules/metaphlan3/main' include { KAIJU_KAIJU } from '../../modules/nf-core/modules/kaiju/kaiju/main' -include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' include { DIAMOND_BLASTX } from '../../modules/nf-core/modules/diamond/blastx/main' include { MOTUS_PROFILE } from '../../modules/nf-core/modules/motus/profile/main' @@ -185,11 +184,8 @@ workflow PROFILING { } KAIJU_KAIJU ( ch_input_for_kaiju.reads, ch_input_for_kaiju.db) - KAIJU_KAIJU2TABLE (KAIJU_KAIJU.out.results, ch_input_for_kaiju.db, params.kaiju_taxon_rank) - ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) ch_versions = ch_versions.mix( KAIJU_KAIJU.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( KAIJU_KAIJU.out.results ) - ch_raw_profiles = ch_raw_profiles.mix( KAIJU_KAIJU2TABLE.out.summary ) } diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 92ceb16..8be6020 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -1,7 +1,8 @@ // -// Create Krona visualizations +// Standardise output files e.g. aggregation // +include { KAIJU_KAIJU2TABLE } from '../../modules/nf-core/modules/kaiju/kaiju2table/main' include { MOTUS_MERGE } from '../../modules/nf-core/modules/motus/merge/main' workflow STANDARDISATION_PROFILES { @@ -14,6 +15,7 @@ workflow STANDARDISATION_PROFILES { main: ch_standardised_tables = Channel.empty() ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() /* Split profile results based on tool they come from @@ -26,12 +28,14 @@ workflow STANDARDISATION_PROFILES { ch_input_classifications = classifications .branch { + kaiju: it[0]['tool'] == 'kaiju' unknown: true } ch_input_databases = databases .branch { motus: it[0]['tool'] == 'motus' + kaiju: it[0]['tool'] == 'kaiju' unknown: true } @@ -39,6 +43,21 @@ workflow STANDARDISATION_PROFILES { Standardise and aggregate */ + // Kaiju + + // Collect and replace id for db_name for prefix + ch_profiles_for_kaiju = ch_input_classifications.kaiju + .map { [it[0]['db_name'], it[1]] } + .groupTuple() + .map { + [[id:it[0]], it[1]] + } + + KAIJU_KAIJU2TABLE ( ch_profiles_for_kaiju, ch_input_databases.kaiju.map{it[1]}, params.kaiju_taxon_rank) + ch_standardised_tables = ch_standardised_tables.mix( KAIJU_KAIJU2TABLE.out.summary ) + ch_multiqc_files = ch_multiqc_files.mix( KAIJU_KAIJU2TABLE.out.summary ) + ch_versions = ch_versions.mix( KAIJU_KAIJU2TABLE.out.versions ) + // mOTUs has a 'single' database, and cannot create custom ones. // Therefore removing db info here, and publish merged at root mOTUs results // directory @@ -51,6 +70,7 @@ workflow STANDARDISATION_PROFILES { ch_versions = ch_versions.mix( MOTUS_MERGE.out.versions ) emit: - tables = ch_standardised_tables + tables = ch_standardised_tables versions = ch_versions + mqc = ch_multiqc_files } diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 2bd6c01..28d248b 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -267,6 +267,12 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) + if ( params.run_profile_standardisation ) { + ch_multiqc_files = ch_multiqc_files.mix( STANDARDISATION_PROFILES.out.mqc.collect{it[1]}.ifEmpty([]) ) + } + + ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) + // TODO create multiQC module for metaphlan MULTIQC ( ch_multiqc_files.collect() From 832d981976da4a49041d72fa519b3e56ff517543 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 7 Sep 2022 12:22:17 +0200 Subject: [PATCH 2/3] Don't have per sample directory as unnecessary if file names are unique with the db in them --- conf/modules.config | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7c1ceba..e367a67 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -268,7 +268,7 @@ process { // one run with multiple samples, so fix ID to just db name to ensure clean log name ext.prefix = { "${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/malt/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/malt/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{rma6,log,sam}' ] @@ -278,7 +278,7 @@ process { ext.args = "-c2c Taxonomy" ext.prefix = { "${meta.id}" } publishDir = [ - path: { "${params.outdir}/malt/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/malt/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt.gz,megan}' ] @@ -288,7 +288,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/kraken2/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/kraken2/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt,report,fastq.gz}' ] @@ -297,7 +297,7 @@ process { withName: KRONA_CLEANUP { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/krona/${ext.prefix}" }, + path: { "${params.outdir}/krona/" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -306,7 +306,7 @@ process { withName: KRONA_KTIMPORTTEXT { ext.prefix = { "${meta.tool}-${meta.id}" } publishDir = [ - path: { "${params.outdir}/krona/${ext.prefix}" }, + path: { "${params.outdir}/krona/" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -321,7 +321,7 @@ process { ext.args = "-i" ext.prefix = { "${meta.tool}-${meta.id}" } publishDir = [ - path: { "${params.outdir}/krona/${ext.prefix}" }, + path: { "${params.outdir}/krona/" }, mode: params.publish_dir_mode, pattern: '*.{html}' ] @@ -331,7 +331,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/metaphlan3/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/metaphlan3/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{biom,txt}' ] @@ -339,7 +339,7 @@ process { withName: CENTRIFUGE_CENTRIFUGE { publishDir = [ - path: { "${params.outdir}/centrifuge/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/centrifuge/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt,sam,gz}' ] @@ -352,7 +352,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}.centrifuge" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}.centrifuge" } publishDir = [ - path: { "${params.outdir}/centrifuge/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/centrifuge/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{txt}' ] @@ -361,7 +361,7 @@ process { withName: KAIJU_KAIJU { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/kaiju/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/kaiju/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.tsv' ] @@ -385,7 +385,7 @@ process { ext.args = { "${meta.db_params}" } ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/diamond/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/diamond/${meta.db_name}/" }, mode: params.publish_dir_mode, pattern: '*.{blast,xml,txt,daa,sam,tsv,paf,log}' ] @@ -394,7 +394,7 @@ process { withName: MOTUS_PROFILE { ext.prefix = params.perform_runmerging ? { "${meta.id}-${meta.db_name}" } : { "${meta.id}-${meta.run_accession}-${meta.db_name}" } publishDir = [ - path: { "${params.outdir}/motus/${meta.db_name}/${ext.prefix}" }, + path: { "${params.outdir}/motus/${meta.db_name}/" }, mode: params.publish_dir_mode ] } From bbd8049ca1690fbd40f9785d7af93768cc0b50da Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 7 Sep 2022 13:44:08 +0200 Subject: [PATCH 3/3] Remove duplicated multqic mixing --- workflows/taxprofiler.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 28d248b..a47b803 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -271,8 +271,6 @@ workflow TAXPROFILER { ch_multiqc_files = ch_multiqc_files.mix( STANDARDISATION_PROFILES.out.mqc.collect{it[1]}.ifEmpty([]) ) } - ch_multiqc_files = ch_multiqc_files.mix( PROFILING.out.mqc.collect{it[1]}.ifEmpty([]) ) - // TODO create multiQC module for metaphlan MULTIQC ( ch_multiqc_files.collect()