From 460f3ca157bd618dde7dbc20f5c99bdd2bda731b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 12 Dec 2022 11:03:40 +0100 Subject: [PATCH] Add database sheet validation --- subworkflows/local/db_check.nf | 31 ++++++++++++++++++++----------- subworkflows/local/profiling.nf | 10 +++++----- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/subworkflows/local/db_check.nf b/subworkflows/local/db_check.nf index ce26905..4ed786d 100644 --- a/subworkflows/local/db_check.nf +++ b/subworkflows/local/db_check.nf @@ -11,14 +11,22 @@ workflow DB_CHECK { main: ch_versions = Channel.empty() - // TODO: make database sheet check - // Checks: - // 1) no duplicates, + // special check to check _between_ rows, for which we must group rows together + Channel.fromPath(dbsheet) + .splitCsv ( header:true, sep:',' ) + .map {[it.tool, it.db_name] } + .groupTuple() + .map { + tool, db_name -> + def unique_names = db_name.unique(false) + if ( unique_names.size() < db_name.size() ) exit 1, "[nf-core/taxprofiler] ERROR: Each database for a tool must have a unique name, duplicated detected. Tool: ${tool}, Database name: ${unique_names}" + } + // normal checks for within-row validity, so can be moved to separate functions parsed_samplesheet = Channel.fromPath(dbsheet) .splitCsv ( header:true, sep:',' ) .map { - validate_db_sheet(it) + validate_db_rows(it) create_db_channels(it) } @@ -40,21 +48,22 @@ workflow DB_CHECK { versions = ch_versions // channel: [ versions.yml ] } -def validate_db_sheet(LinkedHashMap row){ +def validate_db_rows(LinkedHashMap row){ // check minimum number of columns - if (row.size() < 4) exit 1, "[nf-core/taxprofiler] error: Invalid database input sheet - malformed row (e.g. missing column). See documentation for more information. Error in: ${row}, " + if (row.size() < 4) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed row (e.g. missing column). See documentation for more information. Error in: ${row}" // all columns there def expected_headers = ['tool', 'db_name', 'db_params', 'db_path'] - if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] error: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_keys.join(", ")}" + if ( !row.keySet().containsAll(expected_headers) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database input sheet - malformed column names. Please check input TSV. Column names should be: ${expected_keys.join(", ")}" // valid tools specified// TIFNISIH LIST - def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "malt", "metaphlan3" ] + def expected_tools = [ "bracken", "centrifuge", "diamond", "kaiju", "kraken2", "krakenuniq", "malt", "metaphlan3", "motus" ] + if ( !expected_tools.contains(row.tool) ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid tool name. Please see documentation for all supported profilers. Error in: ${row}" // detect quotes in params - if ( row.db_params.contains('"') ) exit 1, "[nf-core/taxprofiler] error: Invalid database db_params entry. No quotes allowed. Error in: ${row}" - if ( row.db_params.contains("'") ) exit 1, "[nf-core/taxprofiler] error: Invalid database db_params entry. No quotes allowed. Error in: ${row}" + if ( row.db_params.contains('"') ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}" + if ( row.db_params.contains("'") ) exit 1, "[nf-core/taxprofiler] ERROR: Invalid database db_params entry. No quotes allowed. Error in: ${row}" } @@ -66,7 +75,7 @@ def create_db_channels(LinkedHashMap row) { def array = [] if (!file(row.db_path, type: 'dir').exists()) { - exit 1, "ERROR: Please check input samplesheet -> database could not be found!\n${row.db_path}" + exit 1, "ERROR: Please check input samplesheet -> database path could not be found!\n${row.db_path}" } array = [ meta, file(row.db_path) ] diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index f5c970c..6613648 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -41,14 +41,14 @@ workflow PROFILING { } .combine(databases) .branch { - malt: it[2]['tool'] == 'malt' - kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken - metaphlan3: it[2]['tool'] == 'metaphlan3' centrifuge: it[2]['tool'] == 'centrifuge' - kaiju: it[2]['tool'] == 'kaiju' diamond: it[2]['tool'] == 'diamond' - motus: it[2]['tool'] == 'motus' + kaiju: it[2]['tool'] == 'kaiju' + kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken krakenuniq: it[2]['tool'] == 'krakenuniq' + malt: it[2]['tool'] == 'malt' + metaphlan3: it[2]['tool'] == 'metaphlan3' + motus: it[2]['tool'] == 'motus' unknown: true }