From 231253227c7e59c1c108cf4cf4953cb89c4f6f5f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 28 Mar 2022 16:38:10 +0200 Subject: [PATCH] Remove run merging for now due to complexity --- conf/modules.config | 4 +- subworkflows/local/shortread_fastp.nf | 17 +++++---- workflows/taxprofiler.nf | 55 ++------------------------- 3 files changed, 14 insertions(+), 62 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 41f471f..a9bc3a1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -105,7 +105,7 @@ process { pattern: '*.{rma6,tab,text,sam,log}' ] ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.db_name}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { @@ -115,7 +115,7 @@ process { pattern: '*.{fastq.gz,txt}' ] ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.db_name}" } + ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index d4d706e..c2e435c 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -28,15 +28,16 @@ workflow SHORTREAD_FASTP { FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) if ( params.shortread_clipmerge_mergepairs ) { - // TODO update to replace meta suffix - ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged - .mix( FASTP_SINGLE.out.reads ) - .map { - meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged + .map { + meta, reads -> + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] } + + ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix( FASTP_SINGLE.out.reads ) + } else { ch_fastp_reads_prepped = FASTP_PAIRED.out.reads .mix( FASTP_SINGLE.out.reads ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index 8a05720..e73d94d 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -116,63 +116,15 @@ workflow TAXPROFILER { ch_longreads_preprocessed = INPUT_CHECK.out.nanopore } - /* - MODULE: PERFORM SHORT READ RUN MERGING - - // Remove run accession to allow grouping by sample. Will only merge - // if pairment type is the same. - - // TODO Current Branch system currently problematic - when single file not in a list, splits at - // `/` so makes list >= 2, so tries to merge, but then breaks kraken downstream - // e.g. `home jfellows Documents git nf-core taxprofiler testing work 68 9a2c8362add37832a776058d280bb7 2612_se.merged.fastq.gz` - // So theoretically need to force this into a list, (but results the can't access meta.id error as incorrect input format) - // But second issue >= 2 is MAYBE sufficient because what if merging two paired-end files? Need to chcek if the input channel formatted correctly for this? Need to check... - - ch_processed_for_combine = ch_shortreads_preprocessed - .dump(tag: "prep_for_combine_grouping") - .map { - meta, reads -> - def meta_new = meta.clone() - - // remove run accession to allow group by sample - meta_new.remove('run_accession') - - // update id to prevent file name clashes when unable to group - // unmerged PE and SE runs of same sample - def type = meta_new['single_end'] ? "_se" : "_pe" - meta_new['id'] = meta['id'] + type - - [ meta_new, reads ] - } - .groupTuple ( by: 0 ) - .dump(tag: "files_for_cat_fastq_branch") - .branch{ - combine: it[1] && it[1].size() > 1 - skip: true - } - - // NOTE: this does not allow CATing of SE & PE runs of same sample - // when --shortread_clipmerge_mergepairs is false - ch_processed_for_combine.combine.dump(tag: "input_into_cat_fastq") - CAT_FASTQ ( ch_processed_for_combine.combine ) - - ch_reads_for_profiling = ch_processed_for_combine.skip - .dump(tag: "skip_combine") - .mix( CAT_FASTQ.out.reads ) - .dump(tag: "files_for_profiling") - */ - - ch_reads_for_profiling = ch_shortreads_preprocessed - /* COMBINE READS WITH POSSIBLE DATABASES */ // e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], /2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], /malt90] - ch_input_for_profiling = ch_reads_for_profiling + ch_input_for_profiling = ch_shortreads_preprocessed .mix( ch_longreads_preprocessed ) .combine(DB_CHECK.out.dbs) - .dump(tag: "reads_plus_db") + .dump(tag: "reads_plus_db_clean") .branch { malt: it[2]['tool'] == 'malt' kraken2: it[2]['tool'] == 'kraken2' @@ -201,12 +153,11 @@ workflow TAXPROFILER { } // We can run Kraken2 one-by-one sample-wise - // TODO Only flatten when paired-end! Causing issue commented out above! ch_input_for_kraken2 = ch_input_for_profiling.kraken2 .dump(tag: "input_to_kraken") .multiMap { it -> - reads: [ it[0] + it[2], it[1].flatten() ] + reads: [ it[0] + it[2], it[1] ] db: it[3] }