mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-24 16:09:55 +00:00
Merge pull request #90 from nf-core/samplesheet-check-improvement
Make check_samplesheet.py compatible with nf-core/fetchngs
This commit is contained in:
commit
1821496a49
1 changed files with 20 additions and 5 deletions
|
@ -37,6 +37,7 @@ def print_error(error, context="Line", context_str=""):
|
||||||
print(error_str)
|
print(error_str)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def check_samplesheet(file_in, file_out):
|
def check_samplesheet(file_in, file_out):
|
||||||
"""
|
"""
|
||||||
This function checks that the samplesheet follows the following structure:
|
This function checks that the samplesheet follows the following structure:
|
||||||
|
@ -87,17 +88,28 @@ def check_samplesheet(file_in, file_out):
|
||||||
"fasta",
|
"fasta",
|
||||||
]
|
]
|
||||||
header = [x.strip('"') for x in fin.readline().strip().split(",")]
|
header = [x.strip('"') for x in fin.readline().strip().split(",")]
|
||||||
if header[: len(HEADER)] != HEADER:
|
|
||||||
|
## Check for missing mandatory columns
|
||||||
|
missing_columns = list(set(HEADER) - set(header))
|
||||||
|
if len(missing_columns) > 0:
|
||||||
print(
|
print(
|
||||||
"ERROR: Please check samplesheet header -> {} != {}".format(
|
"ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
|
||||||
",".join(header), ",".join(HEADER)
|
",".join(missing_columns)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
## Find locations of mandatory columns
|
||||||
|
header_locs = {}
|
||||||
|
for i in HEADER:
|
||||||
|
header_locs[i] = header.index(i)
|
||||||
|
|
||||||
## Check sample entries
|
## Check sample entries
|
||||||
for line in fin:
|
for line in fin:
|
||||||
lspl = [x.strip().strip('"') for x in line.strip().split(",")]
|
|
||||||
|
## Pull out only relevant columns for downstream checking
|
||||||
|
line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
|
||||||
|
lspl = [line_parsed[i] for i in header_locs.values()]
|
||||||
|
|
||||||
# Check valid number of columns per row
|
# Check valid number of columns per row
|
||||||
if len(lspl) < len(HEADER):
|
if len(lspl) < len(HEADER):
|
||||||
|
@ -117,6 +129,7 @@ def check_samplesheet(file_in, file_out):
|
||||||
)
|
)
|
||||||
|
|
||||||
## Check sample name entries
|
## Check sample name entries
|
||||||
|
|
||||||
(
|
(
|
||||||
sample,
|
sample,
|
||||||
run_accession,
|
run_accession,
|
||||||
|
@ -173,7 +186,9 @@ def check_samplesheet(file_in, file_out):
|
||||||
## Auto-detect paired-end/single-end
|
## Auto-detect paired-end/single-end
|
||||||
if sample and fastq_1 and fastq_2: ## Paired-end short reads
|
if sample and fastq_1 and fastq_2: ## Paired-end short reads
|
||||||
sample_info.extend(["0", fastq_1, fastq_2, fasta])
|
sample_info.extend(["0", fastq_1, fastq_2, fasta])
|
||||||
elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads
|
elif (
|
||||||
|
sample and fastq_1 and not fastq_2
|
||||||
|
): ## Single-end short/long fastq reads
|
||||||
sample_info.extend(["1", fastq_1, fastq_2, fasta])
|
sample_info.extend(["1", fastq_1, fastq_2, fasta])
|
||||||
elif (
|
elif (
|
||||||
sample and fasta and not fastq_1 and not fastq_2
|
sample and fasta and not fastq_1 and not fastq_2
|
||||||
|
|
Loading…
Reference in a new issue