mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 05:19:55 +00:00
Debugging run merging
This commit is contained in:
parent
e6e8ed7cc9
commit
8dc9e583ad
5 changed files with 32 additions and 21 deletions
|
@ -65,7 +65,7 @@ process {
|
|||
withName: FASTP {
|
||||
ext.prefix = { "${meta.id}_${meta.run_accession}" }
|
||||
ext.args = [
|
||||
// collapsing options
|
||||
// collapsing options - option to retain singletons
|
||||
params.shortread_clipmerge_excludeunmerged ? '' : "--include_unmerged",
|
||||
// trimming options
|
||||
params.shortread_clipmerge_skiptrim ? "--disable_adapter_trimming" : "",
|
||||
|
@ -105,7 +105,7 @@ process {
|
|||
pattern: '*.{rma6,tab,text,sam,log}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: KRAKEN2_KRAKEN2 {
|
||||
|
@ -115,7 +115,7 @@ process {
|
|||
pattern: '*.{fastq.gz,txt}'
|
||||
]
|
||||
ext.args = { "${meta.db_params}" }
|
||||
ext.prefix = { "${meta.id}-${meta.run_accession}-${meta.db_name}" }
|
||||
ext.prefix = { "${meta.id}-${meta.db_name}" }
|
||||
}
|
||||
|
||||
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
|
||||
|
|
3
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
3
modules/nf-core/modules/kraken2/kraken2/main.nf
generated
|
@ -32,12 +32,13 @@ process KRAKEN2_KRAKEN2 {
|
|||
--threads $task.cpus \\
|
||||
--unclassified-out $unclassified \\
|
||||
--classified-out $classified \\
|
||||
$args \\
|
||||
--report ${prefix}.kraken2.report.txt \\
|
||||
--gzip-compressed \\
|
||||
$paired \\
|
||||
$args \\
|
||||
$reads
|
||||
|
||||
|
||||
pigz -p $task.cpus *.fastq
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
|
|
|
@ -7,7 +7,7 @@ include { FASTP as FASTP_PAIRED } from '../../modules/nf-core/modules/fast
|
|||
|
||||
workflow SHORTREAD_FASTP {
|
||||
take:
|
||||
reads // file: /path/to/samplesheet.csv
|
||||
reads // [[meta], [reads]]
|
||||
|
||||
main:
|
||||
ch_versions = Channel.empty()
|
||||
|
@ -24,16 +24,18 @@ workflow SHORTREAD_FASTP {
|
|||
ch_input_for_fastp.paired.dump(tag: "input_fastp_paired")
|
||||
|
||||
FASTP_SINGLE ( ch_input_for_fastp.single, false, false )
|
||||
// Last parameter here turns on merging of PE data
|
||||
FASTP_PAIRED ( ch_input_for_fastp.paired, false, params.shortread_clipmerge_mergepairs )
|
||||
|
||||
if ( params.shortread_clipmerge_mergepairs ) {
|
||||
// TODO update to replace meta suffix
|
||||
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads_merged
|
||||
.mix( FASTP_SINGLE.out.reads )
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
def meta_new = meta.clone()
|
||||
meta_new['single_end'] = 1
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
} else {
|
||||
ch_fastp_reads_prepped = FASTP_PAIRED.out.reads
|
||||
|
|
|
@ -14,13 +14,6 @@ workflow SHORTREAD_PREPROCESSING {
|
|||
ch_versions = Channel.empty()
|
||||
ch_multiqc_files = Channel.empty()
|
||||
|
||||
//
|
||||
// STEP: Read clipping and merging
|
||||
//
|
||||
// TODO give option to clip only and retain pairs
|
||||
// TODO give option to retain singletons (probably fastp option likely)
|
||||
// TODO move to subworkflow
|
||||
|
||||
if ( params.shortread_clipmerge_tool == "fastp" ) {
|
||||
ch_processed_reads = SHORTREAD_FASTP ( reads ).reads
|
||||
ch_versions = ch_versions.mix( SHORTREAD_FASTP.out.versions )
|
||||
|
|
|
@ -120,25 +120,40 @@ workflow TAXPROFILER {
|
|||
MODULE: PERFORM SHORT READ RUN MERGING
|
||||
*/
|
||||
|
||||
// TODO: Check not necessary for long reads too?
|
||||
// TODO: source of clash - combined should only occur when
|
||||
// files ARE to be combined. SE/unmerged (see not below)
|
||||
// Remove run accession to allow grouping by sample. Will only merge
|
||||
// if pairment type is the same.
|
||||
|
||||
// TODO Current Branch system currently problematic - when single file not in a list, splits at
|
||||
// `/` so makes list >= 2, so tries to merge, but then breaks kraken downstream
|
||||
// e.g. `home jfellows Documents git nf-core taxprofiler testing work 68 9a2c8362add37832a776058d280bb7 2612_se.merged.fastq.gz`
|
||||
// So theoretically need to force this into a list, (but results the can't access meta.id error as incorrect input format)
|
||||
// But second issue >= 2 is MAYBE sufficient because what if merging two paired-end files? Need to chcek if the input channel formatted correctly for this? Need to check...
|
||||
ch_processed_for_combine = ch_shortreads_preprocessed
|
||||
.dump(tag: "prep_for_combine_grouping")
|
||||
.map {
|
||||
meta, reads ->
|
||||
def meta_new = meta.clone()
|
||||
//meta_new['run_accession'] = 'combined'
|
||||
|
||||
// remove run accession to allow group by sample
|
||||
meta_new.remove('run_accession')
|
||||
|
||||
// update id to prevent file name clashes when unable to group
|
||||
// unmerged PE and SE runs of same sample
|
||||
def type = meta_new['single_end'] ? "_se" : "_pe"
|
||||
meta_new['id'] = meta['id'] + type
|
||||
|
||||
[ meta_new, reads ]
|
||||
}
|
||||
.groupTuple ( by: 0 )
|
||||
.dump(tag: "files_for_cat_fastq_branch")
|
||||
.branch{
|
||||
combine: it[1].size() >= 2
|
||||
skip: it[1].size() < 2
|
||||
combine: it[1] && it[1].size() > 1
|
||||
skip: true
|
||||
}
|
||||
|
||||
// NOTE: this does not allow CATing of SE & PE runs of same sample
|
||||
// when --shortread_clipmerge_mergepairs is false
|
||||
ch_processed_for_combine.combine.dump(tag: "input_into_cat_fastq")
|
||||
CAT_FASTQ ( ch_processed_for_combine.combine )
|
||||
|
||||
ch_reads_for_profiling = ch_processed_for_combine.skip
|
||||
|
|
Loading…
Reference in a new issue