From c1a7d6335f1b4c73067926563f8e2d7b0ba80d57 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 13 Jun 2022 21:02:12 +0200 Subject: [PATCH 1/3] Do not require fixed column order --- bin/check_samplesheet.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d10ee90..d7ea5a9 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -37,6 +37,7 @@ def print_error(error, context="Line", context_str=""): print(error_str) sys.exit(1) + def check_samplesheet(file_in, file_out): """ This function checks that the samplesheet follows the following structure: @@ -87,10 +88,13 @@ def check_samplesheet(file_in, file_out): "fasta", ] header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: + + ## Check for missing mandatory columns + missing_columns = list(set(HEADER) - set(header)) + if len(missing_columns) > 0: print( - "ERROR: Please check samplesheet header -> {} != {}".format( - ",".join(header), ",".join(HEADER) + "ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format( + ",".join(missing_columns) ) ) sys.exit(1) @@ -173,7 +177,9 @@ def check_samplesheet(file_in, file_out): ## Auto-detect paired-end/single-end if sample and fastq_1 and fastq_2: ## Paired-end short reads sample_info.extend(["0", fastq_1, fastq_2, fasta]) - elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads + elif ( + sample and fastq_1 and not fastq_2 + ): ## Single-end short/long fastq reads sample_info.extend(["1", fastq_1, fastq_2, fasta]) elif ( sample and fasta and not fastq_1 and not fastq_2 From 2e7f26f380246ade93c56e50f5611fb8f0643b60 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 14 Jun 2022 20:09:48 +0000 Subject: [PATCH 2/3] Replace header tuple with sub-set list of only relevant columns for sample checking --- bin/check_samplesheet.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d7ea5a9..4e80b46 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -99,9 +99,17 @@ def check_samplesheet(file_in, file_out): ) sys.exit(1) + ## Find locations of mandatory columns + header_locs = dict() + for i in HEADER: + header_locs[i] = header.index(i) + ## Check sample entries for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + ## Pull out only relevant columns for downstream checking + line_parsed = [x.strip().strip('"') for x in line.strip().split(",")] + lspl = [line_parsed[i] for i in header_locs.values()] # Check valid number of columns per row if len(lspl) < len(HEADER): @@ -121,6 +129,7 @@ def check_samplesheet(file_in, file_out): ) ## Check sample name entries + ( sample, run_accession, From 4315141c5fa31e721f0cce4154938275c494c68d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Tue, 23 Aug 2022 15:44:53 +0200 Subject: [PATCH 3/3] Update bin/check_samplesheet.py Co-authored-by: Moritz E. Beber --- bin/check_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 4e80b46..8c13690 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -100,7 +100,7 @@ def check_samplesheet(file_in, file_out): sys.exit(1) ## Find locations of mandatory columns - header_locs = dict() + header_locs = {} for i in HEADER: header_locs[i] = header.index(i)