1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-13 08:23:09 +00:00

Merge pull request #258 from nf-core/clone-maps

refactor: double check maps and validation
This commit is contained in:
Moritz E. Beber 2023-03-11 21:39:49 +01:00 committed by GitHub
commit efa398edab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 72 additions and 108 deletions

View file

@ -28,22 +28,20 @@ workflow DB_CHECK {
// Normal checks for within-row validity, so can be moved to separate functions // Normal checks for within-row validity, so can be moved to separate functions
parsed_samplesheet = Channel.fromPath(dbsheet) parsed_samplesheet = Channel.fromPath(dbsheet)
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.map { .map { row ->
validate_db_rows(it) validate_db_rows(row)
create_db_channels(it) return [ row.subMap(['tool', 'db_name', 'db_params']), file(row.db_path) ]
} }
ch_dbs_for_untar = parsed_samplesheet ch_dbs_for_untar = parsed_samplesheet
.branch { .branch { db_meta, db ->
untar: it[1].toString().endsWith(".tar.gz") untar: db.name.endsWith(".tar.gz")
skip: true skip: true
} }
// Filter the channel to untar only those databases for tools that are selected to be run by the user. // Filter the channel to untar only those databases for tools that are selected to be run by the user.
ch_input_untar = ch_dbs_for_untar.untar ch_input_untar = ch_dbs_for_untar.untar
.filter { .filter { db_meta, db -> params["run_${db_meta.tool}"] }
params["run_${it[0]['tool']}"]
}
UNTAR (ch_input_untar) UNTAR (ch_input_untar)
ch_versions = ch_versions.mix(UNTAR.out.versions.first()) ch_versions = ch_versions.mix(UNTAR.out.versions.first())
@ -61,7 +59,7 @@ def validate_db_rows(LinkedHashMap row){
// all columns there // all columns there
def expected_headers = ['tool', 'db_name', 'db_params', 'db_path'] def expected_headers = ['tool', 'db_name', 'db_params', 'db_path']
if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_keys.join(", ")}" if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_headers.join(", ")}"
// valid tools specified // valid tools specified
def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ] def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ]
@ -74,21 +72,7 @@ def validate_db_rows(LinkedHashMap row){
// check if any form of bracken params, that it must have `;` // check if any form of bracken params, that it must have `;`
if ( row.tool == 'bracken' && row.db_params && !row.db_params.contains(";") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. Bracken requires a semi-colon if passing parameter. Error in: ${row}" if ( row.tool == 'bracken' && row.db_params && !row.db_params.contains(";") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. Bracken requires a semi-colon if passing parameter. Error in: ${row}"
// ensure that the database directory exists
if (!file(row.db_path, type: 'dir').exists()) exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}"
} }
def create_db_channels(LinkedHashMap row) {
def meta = [:]
meta.tool = row.tool
meta.db_name = row.db_name
meta.db_params = row.db_params
def array = []
if (!file(row.db_path, type: 'dir').exists()) {
exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}"
}
array = [ meta, file(row.db_path) ]
return array
}

View file

@ -12,9 +12,9 @@ workflow INPUT_CHECK {
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet ) parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.branch { .branch { row ->
fasta: it['fasta'] != '' fasta: row.fasta != ''
nanopore: it['instrument_platform'] == 'OXFORD_NANOPORE' nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
fastq: true fastq: true
} }
@ -37,49 +37,42 @@ workflow INPUT_CHECK {
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
def create_fastq_channel(LinkedHashMap row) { def create_fastq_channel(LinkedHashMap row) {
// create meta map // create meta map
def meta = [:] def meta = row.subMap(['sample', 'run_accession', 'instrument_platform'])
meta.id = row.sample meta.id = meta.sample
meta.run_accession = row.run_accession
meta.instrument_platform = row.instrument_platform
meta.single_end = row.single_end.toBoolean() meta.single_end = row.single_end.toBoolean()
meta.is_fasta = false meta.is_fasta = false
// add path(s) of the fastq file(s) to the meta map // add path(s) of the fastq file(s) to the meta map
def fastq_meta = []
if (!file(row.fastq_1).exists()) { if (!file(row.fastq_1).exists()) {
exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
} }
if (meta.single_end) { if (meta.single_end) {
fastq_meta = [ meta, [ file(row.fastq_1) ] ] return [ meta, [ file(row.fastq_1) ] ]
} else { } else {
if (meta.instrument_platform == 'OXFORD_NANOPORE') { if (meta.instrument_platform == 'OXFORD_NANOPORE') {
if (row.fastq_2 != '') { if (row.fastq_2 != '') {
exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}" exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"
} }
fastq_meta = [ meta, [ file(row.fastq_1) ] ] return [ meta, [ file(row.fastq_1) ] ]
} else { } else {
if (!file(row.fastq_2).exists()) { if (!file(row.fastq_2).exists()) {
exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
} }
fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
}
} }
} // Function to get list of [ meta, fasta ]
return fastq_meta
}// Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) { def create_fasta_channel(LinkedHashMap row) {
def meta = [:] def meta = row.subMap(['sample', 'run_accession', 'instrument_platform'])
meta.id = row.sample meta.id = meta.sample
meta.run_accession = row.run_accession
meta.instrument_platform = row.instrument_platform
meta.single_end = true meta.single_end = true
meta.is_fasta = true meta.is_fasta = true
def array = []
if (!file(row.fasta).exists()) { if (!file(row.fasta).exists()) {
exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}" exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
} }
array = [ meta, [ file(row.fasta) ] ] return [ meta, [ file(row.fasta) ] ]
return array
} }

View file

@ -46,7 +46,7 @@ workflow LONGREAD_HOSTREMOVAL {
ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() ) ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions.first() )
bam_bai = MINIMAP2_ALIGN.out.bam bam_bai = MINIMAP2_ALIGN.out.bam
.join(SAMTOOLS_INDEX.out.bai, remainder: true) .join(SAMTOOLS_INDEX.out.bai)
SAMTOOLS_STATS ( bam_bai, reference ) SAMTOOLS_STATS ( bam_bai, reference )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())

View file

@ -20,12 +20,7 @@ workflow LONGREAD_PREPROCESSING {
PORECHOP_PORECHOP ( reads ) PORECHOP_PORECHOP ( reads )
ch_processed_reads = PORECHOP_PORECHOP.out.reads ch_processed_reads = PORECHOP_PORECHOP.out.reads
.map { .map { meta, reads -> [ meta + [single_end: 1], reads ] }
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log ) ch_multiqc_files = ch_multiqc_files.mix( PORECHOP_PORECHOP.out.log )
@ -39,12 +34,7 @@ workflow LONGREAD_PREPROCESSING {
} else { } else {
PORECHOP_PORECHOP ( reads ) PORECHOP_PORECHOP ( reads )
ch_clipped_reads = PORECHOP_PORECHOP.out.reads ch_clipped_reads = PORECHOP_PORECHOP.out.reads
.map { .map { meta, reads -> [ meta + [single_end: 1], reads ] }
meta, reads ->
def meta_new = meta.clone()
meta_new['single_end'] = 1
[ meta_new, reads ]
}
ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads ch_processed_reads = FILTLONG ( ch_clipped_reads.map { meta, reads -> [ meta, [], reads ] } ).reads

View file

@ -35,10 +35,7 @@ workflow PROFILING {
ch_input_for_profiling = reads ch_input_for_profiling = reads
.map { .map {
meta, reads -> meta, reads ->
def meta_new = meta.clone() [meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
pairtype = meta_new['single_end'] ? '_se' : '_pe'
meta_new['id'] = meta_new['id'] + pairtype
[meta_new, reads]
} }
.combine(databases) .combine(databases)
.branch { .branch {
@ -92,9 +89,9 @@ workflow PROFILING {
} }
.groupTuple(by: [0,2]) .groupTuple(by: [0,2])
.multiMap { .multiMap {
it -> meta, reads, db ->
reads: [ it[0], it[1].flatten() ] reads: [ meta, reads.flatten() ]
db: it[2] db: db
} }
MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db ) MALT_RUN ( ch_input_for_malt.reads, ch_input_for_malt.db )