1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-24 16:09:55 +00:00

Merge pull request #90 from nf-core/samplesheet-check-improvement

Make check_samplesheet.py compatible with nf-core/fetchngs
This commit is contained in:
James A. Fellows Yates 2022-08-23 15:57:59 +01:00 committed by GitHub
commit 1821496a49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -37,6 +37,7 @@ def print_error(error, context="Line", context_str=""):
print(error_str) print(error_str)
sys.exit(1) sys.exit(1)
def check_samplesheet(file_in, file_out): def check_samplesheet(file_in, file_out):
""" """
This function checks that the samplesheet follows the following structure: This function checks that the samplesheet follows the following structure:
@ -87,17 +88,28 @@ def check_samplesheet(file_in, file_out):
"fasta", "fasta",
] ]
header = [x.strip('"') for x in fin.readline().strip().split(",")] header = [x.strip('"') for x in fin.readline().strip().split(",")]
if header[: len(HEADER)] != HEADER:
## Check for missing mandatory columns
missing_columns = list(set(HEADER) - set(header))
if len(missing_columns) > 0:
print( print(
"ERROR: Please check samplesheet header -> {} != {}".format( "ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
",".join(header), ",".join(HEADER) ",".join(missing_columns)
) )
) )
sys.exit(1) sys.exit(1)
## Find locations of mandatory columns
header_locs = {}
for i in HEADER:
header_locs[i] = header.index(i)
## Check sample entries ## Check sample entries
for line in fin: for line in fin:
lspl = [x.strip().strip('"') for x in line.strip().split(",")]
## Pull out only relevant columns for downstream checking
line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
lspl = [line_parsed[i] for i in header_locs.values()]
# Check valid number of columns per row # Check valid number of columns per row
if len(lspl) < len(HEADER): if len(lspl) < len(HEADER):
@ -117,6 +129,7 @@ def check_samplesheet(file_in, file_out):
) )
## Check sample name entries ## Check sample name entries
( (
sample, sample,
run_accession, run_accession,
@ -173,7 +186,9 @@ def check_samplesheet(file_in, file_out):
## Auto-detect paired-end/single-end ## Auto-detect paired-end/single-end
if sample and fastq_1 and fastq_2: ## Paired-end short reads if sample and fastq_1 and fastq_2: ## Paired-end short reads
sample_info.extend(["0", fastq_1, fastq_2, fasta]) sample_info.extend(["0", fastq_1, fastq_2, fasta])
elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads elif (
sample and fastq_1 and not fastq_2
): ## Single-end short/long fastq reads
sample_info.extend(["1", fastq_1, fastq_2, fasta]) sample_info.extend(["1", fastq_1, fastq_2, fasta])
elif ( elif (
sample and fasta and not fastq_1 and not fastq_2 sample and fasta and not fastq_1 and not fastq_2