mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-21 16:56:04 +00:00
Merge pull request #90 from nf-core/samplesheet-check-improvement
Make check_samplesheet.py compatible with nf-core/fetchngs
This commit is contained in:
commit
1821496a49
1 changed files with 20 additions and 5 deletions
|
@ -37,6 +37,7 @@ def print_error(error, context="Line", context_str=""):
|
|||
print(error_str)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def check_samplesheet(file_in, file_out):
|
||||
"""
|
||||
This function checks that the samplesheet follows the following structure:
|
||||
|
@ -87,17 +88,28 @@ def check_samplesheet(file_in, file_out):
|
|||
"fasta",
|
||||
]
|
||||
header = [x.strip('"') for x in fin.readline().strip().split(",")]
|
||||
if header[: len(HEADER)] != HEADER:
|
||||
|
||||
## Check for missing mandatory columns
|
||||
missing_columns = list(set(HEADER) - set(header))
|
||||
if len(missing_columns) > 0:
|
||||
print(
|
||||
"ERROR: Please check samplesheet header -> {} != {}".format(
|
||||
",".join(header), ",".join(HEADER)
|
||||
"ERROR: Missing required column header -> {}. Note some columns can otherwise be empty. See pipeline documentation (https://nf-co.re/taxprofiler/usage).".format(
|
||||
",".join(missing_columns)
|
||||
)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
## Find locations of mandatory columns
|
||||
header_locs = {}
|
||||
for i in HEADER:
|
||||
header_locs[i] = header.index(i)
|
||||
|
||||
## Check sample entries
|
||||
for line in fin:
|
||||
lspl = [x.strip().strip('"') for x in line.strip().split(",")]
|
||||
|
||||
## Pull out only relevant columns for downstream checking
|
||||
line_parsed = [x.strip().strip('"') for x in line.strip().split(",")]
|
||||
lspl = [line_parsed[i] for i in header_locs.values()]
|
||||
|
||||
# Check valid number of columns per row
|
||||
if len(lspl) < len(HEADER):
|
||||
|
@ -117,6 +129,7 @@ def check_samplesheet(file_in, file_out):
|
|||
)
|
||||
|
||||
## Check sample name entries
|
||||
|
||||
(
|
||||
sample,
|
||||
run_accession,
|
||||
|
@ -173,7 +186,9 @@ def check_samplesheet(file_in, file_out):
|
|||
## Auto-detect paired-end/single-end
|
||||
if sample and fastq_1 and fastq_2: ## Paired-end short reads
|
||||
sample_info.extend(["0", fastq_1, fastq_2, fasta])
|
||||
elif sample and fastq_1 and not fastq_2: ## Single-end short/long fastq reads
|
||||
elif (
|
||||
sample and fastq_1 and not fastq_2
|
||||
): ## Single-end short/long fastq reads
|
||||
sample_info.extend(["1", fastq_1, fastq_2, fasta])
|
||||
elif (
|
||||
sample and fasta and not fastq_1 and not fastq_2
|
||||
|
|
Loading…
Reference in a new issue