mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-22 07:29:55 +00:00
Clean up AdapterRemoval and remove all debugging dumps
This commit is contained in:
parent
03e832954f
commit
93aaa9eeac
5 changed files with 18 additions and 27 deletions
|
@ -18,9 +18,7 @@ workflow DB_CHECK {
|
||||||
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
|
parsed_samplesheet = DATABASE_CHECK ( dbsheet )
|
||||||
.csv
|
.csv
|
||||||
.splitCsv ( header:true, sep:',' )
|
.splitCsv ( header:true, sep:',' )
|
||||||
.dump(tag: "db_split_csv_out")
|
|
||||||
.map { create_db_channels(it) }
|
.map { create_db_channels(it) }
|
||||||
.dump(tag: "db_channel_prepped")
|
|
||||||
|
|
||||||
ch_dbs_for_untar = parsed_samplesheet
|
ch_dbs_for_untar = parsed_samplesheet
|
||||||
.branch {
|
.branch {
|
||||||
|
|
|
@ -12,7 +12,6 @@ workflow INPUT_CHECK {
|
||||||
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
||||||
.csv
|
.csv
|
||||||
.splitCsv ( header:true, sep:',' )
|
.splitCsv ( header:true, sep:',' )
|
||||||
.dump(tag: "input_split_csv_out")
|
|
||||||
.branch {
|
.branch {
|
||||||
fasta: it['fasta'] != ''
|
fasta: it['fasta'] != ''
|
||||||
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE'
|
||||||
|
@ -21,17 +20,14 @@ workflow INPUT_CHECK {
|
||||||
|
|
||||||
parsed_samplesheet.fastq
|
parsed_samplesheet.fastq
|
||||||
.map { create_fastq_channel(it) }
|
.map { create_fastq_channel(it) }
|
||||||
.dump(tag: "fastq_channel_init")
|
|
||||||
.set { fastq }
|
.set { fastq }
|
||||||
|
|
||||||
parsed_samplesheet.nanopore
|
parsed_samplesheet.nanopore
|
||||||
.map { create_fastq_channel(it) }
|
.map { create_fastq_channel(it) }
|
||||||
.dump(tag: "fastq_nanopore_channel_init")
|
|
||||||
.set { nanopore }
|
.set { nanopore }
|
||||||
|
|
||||||
parsed_samplesheet.fasta
|
parsed_samplesheet.fasta
|
||||||
.map { create_fasta_channel(it) }
|
.map { create_fasta_channel(it) }
|
||||||
.dump(tag: "fasta_channel_init")
|
|
||||||
.set { fasta }
|
.set { fasta }
|
||||||
|
|
||||||
emit:
|
emit:
|
||||||
|
|
|
@ -16,7 +16,6 @@ workflow LONGREAD_PREPROCESSING {
|
||||||
PORECHOP ( reads )
|
PORECHOP ( reads )
|
||||||
|
|
||||||
ch_processed_reads = PORECHOP.out.reads
|
ch_processed_reads = PORECHOP.out.reads
|
||||||
.dump(tag: "pre_fastqc_check")
|
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
def meta_new = meta.clone()
|
def meta_new = meta.clone()
|
||||||
|
|
|
@ -24,9 +24,10 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
ADAPTERREMOVAL_SINGLE ( ch_input_for_adapterremoval.single, [] )
|
||||||
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
ADAPTERREMOVAL_PAIRED ( ch_input_for_adapterremoval.paired, [] )
|
||||||
|
|
||||||
|
// due to the slightly ugly output implementation of the current AdapterRemoval2 version, each file
|
||||||
|
// has to be exported in a separate channel, and we must manually recombine when necessary
|
||||||
|
|
||||||
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
if ( params.shortread_clipmerge_mergepairs && !params.shortread_clipmerge_excludeunmerged ) {
|
||||||
// due to the slightly ugly output implementation of the current AdapterRemoval2 module, each file
|
|
||||||
// has to be exported in a separate channel, and we must manually recombine when necessary
|
|
||||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||||
.mix(
|
.mix(
|
||||||
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
ADAPTERREMOVAL_PAIRED.out.collapsed_truncated,
|
||||||
|
@ -43,22 +44,23 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
}
|
}
|
||||||
.groupTuple()
|
.groupTuple()
|
||||||
|
|
||||||
|
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
|
|
||||||
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
} else if ( params.shortread_clipmerge_mergepairs && params.shortread_clipmerge_excludeunmerged ) {
|
||||||
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
ch_adapterremoval_for_cat = ADAPTERREMOVAL_PAIRED.out.collapsed
|
||||||
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
|
.mix( ADAPTERREMOVAL_PAIRED.out.collapsed_truncated )
|
||||||
.map {
|
.map {
|
||||||
meta, reads ->
|
meta, reads ->
|
||||||
def meta_new = meta.clone()
|
def meta_new = meta.clone()
|
||||||
meta_new['single_end'] = true
|
meta_new['single_end'] = true
|
||||||
|
|
||||||
[ meta_new, reads ]
|
[ meta_new, reads ]
|
||||||
}
|
}
|
||||||
.groupTuple(by: 0)
|
.groupTuple(by: 0)
|
||||||
|
|
||||||
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
ch_adapterremoval_reads_prepped = CAT_FASTQ ( ch_adapterremoval_for_cat ).reads
|
||||||
|
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
@ -68,7 +70,7 @@ workflow SHORTREAD_ADAPTERREMOVAL {
|
||||||
.map { meta, pair1, pair2 ->
|
.map { meta, pair1, pair2 ->
|
||||||
[ meta, [ pair1, pair2 ].flatten() ]
|
[ meta, [ pair1, pair2 ].flatten() ]
|
||||||
}
|
}
|
||||||
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
.mix( ADAPTERREMOVAL_SINGLE.out.singles_truncated )
|
||||||
}
|
}
|
||||||
|
|
||||||
ch_processed_reads = ch_adapterremoval_reads_prepped
|
ch_processed_reads = ch_adapterremoval_reads_prepped
|
||||||
|
|
|
@ -18,6 +18,7 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
|
||||||
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
if (params.input ) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
|
||||||
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
if (params.databases) { ch_databases = file(params.databases) } else { exit 1, 'Input database sheet not specified!' }
|
||||||
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
|
if (params.shortread_clipmerge_mergepairs && params.run_malt ) log.warn "[nf-core/taxprofiler] warning: MALT does not except uncollapsed paired-reads. Pairs will be profiled as separate files."
|
||||||
|
if (params.shortread_clipmerge_excludeunmerged && !params.shortread_clipmerge_mergepairs) exit 1, "[nf-core/taxprofiler] error: cannot include unmerged reads when merging not turned on. Please specify --shortread_clipmerge_mergepairs"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -126,7 +127,6 @@ workflow TAXPROFILER {
|
||||||
ch_input_for_profiling = ch_shortreads_preprocessed
|
ch_input_for_profiling = ch_shortreads_preprocessed
|
||||||
.mix( ch_longreads_preprocessed )
|
.mix( ch_longreads_preprocessed )
|
||||||
.combine(DB_CHECK.out.dbs)
|
.combine(DB_CHECK.out.dbs)
|
||||||
.dump(tag: "reads_plus_db_clean")
|
|
||||||
.branch {
|
.branch {
|
||||||
malt: it[2]['tool'] == 'malt'
|
malt: it[2]['tool'] == 'malt'
|
||||||
kraken2: it[2]['tool'] == 'kraken2'
|
kraken2: it[2]['tool'] == 'kraken2'
|
||||||
|
@ -141,9 +141,7 @@ workflow TAXPROFILER {
|
||||||
// loading takes a long time, so we only want to run it once per database
|
// loading takes a long time, so we only want to run it once per database
|
||||||
// TODO document somewhere we only accept illumina short reads for MALT?
|
// TODO document somewhere we only accept illumina short reads for MALT?
|
||||||
ch_input_for_malt = ch_input_for_profiling.malt
|
ch_input_for_malt = ch_input_for_profiling.malt
|
||||||
.dump(tag: "input_to_malt_prefilter")
|
|
||||||
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
|
.filter { it[0]['instrument_platform'] == 'ILLUMINA' }
|
||||||
.dump(tag: "input_to_malt_postfilter")
|
|
||||||
.map {
|
.map {
|
||||||
it ->
|
it ->
|
||||||
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
def temp_meta = [ id: it[2]['db_name']] + it[2]
|
||||||
|
@ -151,7 +149,6 @@ workflow TAXPROFILER {
|
||||||
[ temp_meta, it[1], db ]
|
[ temp_meta, it[1], db ]
|
||||||
}
|
}
|
||||||
.groupTuple(by: [0,2])
|
.groupTuple(by: [0,2])
|
||||||
.dump(tag: "input_to_malt")
|
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [ it[0], it[1].flatten() ]
|
reads: [ it[0], it[1].flatten() ]
|
||||||
|
@ -160,7 +157,6 @@ workflow TAXPROFILER {
|
||||||
|
|
||||||
// We can run Kraken2 one-by-one sample-wise
|
// We can run Kraken2 one-by-one sample-wise
|
||||||
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
ch_input_for_kraken2 = ch_input_for_profiling.kraken2
|
||||||
.dump(tag: "input_to_kraken")
|
|
||||||
.multiMap {
|
.multiMap {
|
||||||
it ->
|
it ->
|
||||||
reads: [ it[0] + it[2], it[1] ]
|
reads: [ it[0] + it[2], it[1] ]
|
||||||
|
|
Loading…
Reference in a new issue