From 8dc9e583add80f51111dc6c796a2ea1413e8731b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Sun, 27 Mar 2022 09:30:23 +0200 Subject: [PATCH] Debugging run merging --- conf/modules.config | 6 ++--- .../nf-core/modules/kraken2/kraken2/main.nf | 3 ++- subworkflows/local/shortread_fastp.nf | 10 ++++--- subworkflows/local/shortread_preprocessing.nf | 7 ----- workflows/taxprofiler.nf | 27 ++++++++++++++----- 5 files changed, 32 insertions(+), 21 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5823d3f..41f471f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -65,7 +65,7 @@ process { withName: FASTP { ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.args = [ - // collapsing options + // collapsing options - option to retain singletons params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", // trimming options params.shortread_clipmerge_skiptrim ? "--disable_adapter_trimming" : "", @@ -105,7 +105,7 @@ process { pattern: '*.{rma6,tab,text,sam,log}' ] ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = { "${meta.id}-${meta.db_name}" } } withName: KRAKEN2_KRAKEN2 { @@ -115,7 +115,7 @@ process { pattern: '*.{fastq.gz,txt}' ] ext.args = { "${meta.db_params}" } - ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } + ext.prefix = { "${meta.id}-${meta.db_name}" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index 3ec5df5..52351f5 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -32,11 +32,12 @@ process KRAKEN2_KRAKEN2 { --threads $task.cpus \\ --unclassified-out $unclassified \\ --classified-out $classified \\ + $args \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ $paired \\ - $args \\ $reads + pigz -p $task.cpus *.fastq diff --git a/subworkflows/local/shortread_fastp.nf b/subworkflows/local/shortread_fastp.nf index 57ce2a6..d4d706e 100644 --- a/subworkflows/local/shortread_fastp.nf +++ b/subworkflows/local/shortread_fastp.nf @@ -7,7 +7,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fast workflow SHORTREAD_FASTP { take: - reads // file: /path/to/samplesheet.csv + reads // [[meta], [reads]] main: ch_versions = Channel.empty() @@ -24,16 +24,18 @@ workflow SHORTREAD_FASTP { ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) + // Last parameter here turns on merging of PE data FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) if ( params.shortread_clipmerge_mergepairs ) { + // TODO update to replace meta suffix ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged .mix( FASTP_SINGLE.out.reads ) .map { meta, reads -> - def meta_new = meta.clone() - meta_new['single_end'] = 1 - [ meta_new, reads ] + def meta_new = meta.clone() + meta_new['single_end'] = 1 + [ meta_new, reads ] } } else { ch_fastp_reads_prepped = FASTP_PAIRED.out.reads diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index c82536f..7fba0c0 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -14,13 +14,6 @@ workflow SHORTREAD_PREPROCESSING { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // - // STEP: Read clipping and merging - // - // TODO give option to clip only and retain pairs - // TODO give option to retain singletons (probably fastp option likely) - // TODO move to subworkflow - if ( params.shortread_clipmerge_tool == "fastp" ) { ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) diff --git a/workflows/taxprofiler.nf b/workflows/taxprofiler.nf index ee921ea..87f7a30 100644 --- a/workflows/taxprofiler.nf +++ b/workflows/taxprofiler.nf @@ -120,25 +120,40 @@ workflow TAXPROFILER { MODULE: PERFORM SHORT READ RUN MERGING */ - // TODO: Check not necessary for long reads too? - // TODO: source of clash - combined should only occur when - // files ARE to be combined. SE/unmerged (see not below) + // Remove run accession to allow grouping by sample. Will only merge + // if pairment type is the same. + + // TODO Current Branch system currently problematic - when single file not in a list, splits at + // `/` so makes list >= 2, so tries to merge, but then breaks kraken downstream + // e.g. `home jfellows Documents git nf-core taxprofiler testing work 68 9a2c8362add37832a776058d280bb7 2612_se.merged.fastq.gz` + // So theoretically need to force this into a list, (but results the can't access meta.id error as incorrect input format) + // But second issue >= 2 is MAYBE sufficient because what if merging two paired-end files? Need to chcek if the input channel formatted correctly for this? Need to check... ch_processed_for_combine = ch_shortreads_preprocessed .dump(tag: "prep_for_combine_grouping") .map { meta, reads -> def meta_new = meta.clone() - //meta_new['run_accession'] = 'combined' + + // remove run accession to allow group by sample + meta_new.remove('run_accession') + + // update id to prevent file name clashes when unable to group + // unmerged PE and SE runs of same sample + def type = meta_new['single_end'] ? "_se" : "_pe" + meta_new['id'] = meta['id'] + type + [ meta_new, reads ] } .groupTuple ( by: 0 ) + .dump(tag: "files_for_cat_fastq_branch") .branch{ - combine: it[1].size() >= 2 - skip: it[1].size() < 2 + combine: it[1] && it[1].size() > 1 + skip: true } // NOTE: this does not allow CATing of SE & PE runs of same sample // when --shortread_clipmerge_mergepairs is false + ch_processed_for_combine.combine.dump(tag: "input_into_cat_fastq") CAT_FASTQ ( ch_processed_for_combine.combine ) ch_reads_for_profiling = ch_processed_for_combine.skip