1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 04:49:54 +00:00

Debugging run merging

This commit is contained in:
James Fellows Yates 2022-03-27 09:30:23 +02:00
parent e6e8ed7cc9
commit 8dc9e583ad
5 changed files with 32 additions and 21 deletions

View file

@ -65,7 +65,7 @@ process {
withName: FASTP { withName: FASTP {
ext.prefix = { "${meta.id}_${meta.run_accession}" } ext.prefix = { "${meta.id}_${meta.run_accession}" }
ext.args = [ ext.args = [
// collapsing options // collapsing options - option to retain singletons
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged", params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
// trimming options // trimming options
params.shortread_clipmerge_skiptrim ? "--disable_adapter_trimming" : "", params.shortread_clipmerge_skiptrim ? "--disable_adapter_trimming" : "",
@ -105,7 +105,7 @@ process {
pattern: '*.{rma6,tab,text,sam,log}' pattern: '*.{rma6,tab,text,sam,log}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = { "${meta.id}-${meta.db_name}" }
} }
withName: KRAKEN2_KRAKEN2 { withName: KRAKEN2_KRAKEN2 {
@ -115,7 +115,7 @@ process {
pattern: '*.{fastq.gz,txt}' pattern: '*.{fastq.gz,txt}'
] ]
ext.args = { "${meta.db_params}" } ext.args = { "${meta.db_params}" }
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" } ext.prefix = { "${meta.id}-${meta.db_name}" }
} }
withName: CUSTOM_DUMPSOFTWAREVERSIONS { withName: CUSTOM_DUMPSOFTWAREVERSIONS {

View file

@ -32,11 +32,12 @@ process KRAKEN2_KRAKEN2 {
--threads $task.cpus \\ --threads $task.cpus \\
--unclassified-out $unclassified \\ --unclassified-out $unclassified \\
--classified-out $classified \\ --classified-out $classified \\
$args \\
--report ${prefix}.kraken2.report.txt \\ --report ${prefix}.kraken2.report.txt \\
--gzip-compressed \\ --gzip-compressed \\
$paired \\ $paired \\
$args \\
$reads $reads
pigz -p $task.cpus *.fastq pigz -p $task.cpus *.fastq

View file

@ -7,7 +7,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fast
workflow SHORTREAD_FASTP { workflow SHORTREAD_FASTP {
take: take:
reads // file: /path/to/samplesheet.csv reads // [[meta], [reads]]
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
@ -24,16 +24,18 @@ workflow SHORTREAD_FASTP {
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired") ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
FASTP_SINGLE ( ch_input_for_fastp.single, false, false ) FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
// Last parameter here turns on merging of PE data
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs ) FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
if ( params.shortread_clipmerge_mergepairs ) { if ( params.shortread_clipmerge_mergepairs ) {
// TODO update to replace meta suffix
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
.mix( FASTP_SINGLE.out.reads ) .mix( FASTP_SINGLE.out.reads )
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() def meta_new = meta.clone()
meta_new['single_end'] = 1 meta_new['single_end'] = 1
[ meta_new, reads ] [ meta_new, reads ]
} }
} else { } else {
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads ch_fastp_reads_prepped = FASTP_PAIRED.out.reads

View file

@ -14,13 +14,6 @@ workflow SHORTREAD_PREPROCESSING {
ch_versions = Channel.empty() ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty() ch_multiqc_files = Channel.empty()
//
// STEP: Read clipping and merging
//
// TODO give option to clip only and retain pairs
// TODO give option to retain singletons (probably fastp option likely)
// TODO move to subworkflow
if ( params.shortread_clipmerge_tool == "fastp" ) { if ( params.shortread_clipmerge_tool == "fastp" ) {
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions ) ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )

View file

@ -120,25 +120,40 @@ workflow TAXPROFILER {
MODULE: PERFORM SHORT READ RUN MERGING MODULE: PERFORM SHORT READ RUN MERGING
*/ */
// TODO: Check not necessary for long reads too? // Remove run accession to allow grouping by sample. Will only merge
// TODO: source of clash - combined should only occur when // if pairment type is the same.
// files ARE to be combined. SE/unmerged (see not below)
// TODO Current Branch system currently problematic - when single file not in a list, splits at
// `/` so makes list >= 2, so tries to merge, but then breaks kraken downstream
// e.g. `home jfellows Documents git nf-core taxprofiler testing work 68 9a2c8362add37832a776058d280bb7 2612_se.merged.fastq.gz`
// So theoretically need to force this into a list, (but results the can't access meta.id error as incorrect input format)
// But second issue >= 2 is MAYBE sufficient because what if merging two paired-end files? Need to chcek if the input channel formatted correctly for this? Need to check...
ch_processed_for_combine = ch_shortreads_preprocessed ch_processed_for_combine = ch_shortreads_preprocessed
.dump(tag: "prep_for_combine_grouping") .dump(tag: "prep_for_combine_grouping")
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() def meta_new = meta.clone()
//meta_new['run_accession'] = 'combined'
// remove run accession to allow group by sample
meta_new.remove('run_accession')
// update id to prevent file name clashes when unable to group
// unmerged PE and SE runs of same sample
def type = meta_new['single_end'] ? "_se" : "_pe"
meta_new['id'] = meta['id'] + type
[ meta_new, reads ] [ meta_new, reads ]
} }
.groupTuple ( by: 0 ) .groupTuple ( by: 0 )
.dump(tag: "files_for_cat_fastq_branch")
.branch{ .branch{
combine: it[1].size() >= 2 combine: it[1] && it[1].size() > 1
skip: it[1].size() < 2 skip: true
} }
// NOTE: this does not allow CATing of SE & PE runs of same sample // NOTE: this does not allow CATing of SE & PE runs of same sample
// when --shortread_clipmerge_mergepairs is false // when --shortread_clipmerge_mergepairs is false
ch_processed_for_combine.combine.dump(tag: "input_into_cat_fastq")
CAT_FASTQ ( ch_processed_for_combine.combine ) CAT_FASTQ ( ch_processed_for_combine.combine )
ch_reads_for_profiling = ch_processed_for_combine.skip ch_reads_for_profiling = ch_processed_for_combine.skip