1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 06:59:54 +00:00

Clean up AdapterRemoval and remove all debugging dumps

This commit is contained in:
James Fellows Yates 2022-04-01 21:36:22 +02:00
parent 03e832954f
commit 93aaa9eeac
5 changed files with 18 additions and 27 deletions

View file

@ -18,9 +18,7 @@ workflow DB_CHECK {
parsed_samplesheet = DATABASE_CHECK ( dbsheet ) parsed_samplesheet = DATABASE_CHECK ( dbsheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.dump(tag: "db_split_csv_out")
.map { create_db_channels(it) } .map { create_db_channels(it) }
.dump(tag: "db_channel_prepped")
ch_dbs_for_untar = parsed_samplesheet ch_dbs_for_untar = parsed_samplesheet
.branch { .branch {

View file

@ -12,7 +12,6 @@ workflow INPUT_CHECK {
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet ) parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.dump(tag: "input_split_csv_out")
.branch { .branch {
fasta: it['fasta'] != '' fasta: it['fasta'] != ''
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
@ -21,17 +20,14 @@ workflow INPUT_CHECK {
parsed_samplesheet.fastq parsed_samplesheet.fastq
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
.dump(tag: "fastq_channel_init")
.set { fastq } .set { fastq }
parsed_samplesheet.nanopore parsed_samplesheet.nanopore
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
.dump(tag: "fastq_nanopore_channel_init")
.set { nanopore } .set { nanopore }
parsed_samplesheet.fasta parsed_samplesheet.fasta
.map { create_fasta_channel(it) } .map { create_fasta_channel(it) }
.dump(tag: "fasta_channel_init")
.set { fasta } .set { fasta }
emit: emit:

View file

@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
PORECHOP ( reads ) PORECHOP ( reads )
ch_processed_reads = PORECHOP.out.reads ch_processed_reads = PORECHOP.out.reads
.dump(tag: "pre_fastqc_check")
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() def meta_new = meta.clone()

View file

@ -24,9 +24,10 @@ workflow SHORTREAD_ADAPTERREMOVAL {
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] ) ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] ) ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) { // due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
// due to the slightly ugly output implementation of the current AdapterRemoval2 module, each file
// has to be exported in a separate channel, and we must manually recombine when necessary // has to be exported in a separate channel, and we must manually recombine when necessary
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
.mix( .mix(
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
@ -43,8 +44,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
} }
.groupTuple() .groupTuple()
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) .mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) { } else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
@ -58,7 +59,8 @@ workflow SHORTREAD_ADAPTERREMOVAL {
} }
.groupTuple(by: 0) .groupTuple(by: 0)
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated ) ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
} else { } else {

View file

@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' } if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files." if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
/* /*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -126,7 +127,6 @@ workflow TAXPROFILER {
ch_input_for_profiling = ch_shortreads_preprocessed ch_input_for_profiling = ch_shortreads_preprocessed
.mix( ch_longreads_preprocessed ) .mix( ch_longreads_preprocessed )
.combine(DB_CHECK.out.dbs) .combine(DB_CHECK.out.dbs)
.dump(tag: "reads_plus_db_clean")
.branch { .branch {
malt: it[2]['tool'] == 'malt' malt: it[2]['tool'] == 'malt'
kraken2: it[2]['tool'] == 'kraken2' kraken2: it[2]['tool'] == 'kraken2'
@ -141,9 +141,7 @@ workflow TAXPROFILER {
// loading takes a long time, so we only want to run it once per database // loading takes a long time, so we only want to run it once per database
// TODO document somewhere we only accept illumina short reads for MALT? // TODO document somewhere we only accept illumina short reads for MALT?
ch_input_for_malt = ch_input_for_profiling.malt ch_input_for_malt = ch_input_for_profiling.malt
.dump(tag: "input_to_malt_prefilter")
.filter { it[0]['instrument_platform'] == 'ILLUMINA' } .filter { it[0]['instrument_platform'] == 'ILLUMINA' }
.dump(tag: "input_to_malt_postfilter")
.map { .map {
it -> it ->
def temp_meta = [ id: it[2]['db_name']] + it[2] def temp_meta = [ id: it[2]['db_name']] + it[2]
@ -151,7 +149,6 @@ workflow TAXPROFILER {
[ temp_meta, it[1], db ] [ temp_meta, it[1], db ]
} }
.groupTuple(by: [0,2]) .groupTuple(by: [0,2])
.dump(tag: "input_to_malt")
.multiMap { .multiMap {
it -> it ->
reads: [ it[0], it[1].flatten() ] reads: [ it[0], it[1].flatten() ]
@ -160,7 +157,6 @@ workflow TAXPROFILER {
// We can run Kraken2 one-by-one sample-wise // We can run Kraken2 one-by-one sample-wise
ch_input_for_kraken2 = ch_input_for_profiling.kraken2 ch_input_for_kraken2 = ch_input_for_profiling.kraken2
.dump(tag: "input_to_kraken")
.multiMap { .multiMap {
it -> it ->
reads: [ it[0] + it[2], it[1] ] reads: [ it[0] + it[2], it[1] ]