1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-12-05 02:29:55 +00:00
taxprofiler/subworkflows/local/input_check.nf

104 lines
3.5 KiB
Text
Raw Normal View History

//
// Check input samplesheet and get read channels
//
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
workflow INPUT_CHECK {
take:
samplesheet // file: /path/to/samplesheet.csv
main:
2023-03-17 21:30:29 +00:00
// Table to list, group per sample, detect if sample has multi-run,
2023-03-17 21:30:29 +00:00
// then spread back to per-run rows but with multi-run info added to meta
ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv
.splitCsv ( header:true, sep:',' )
2023-03-17 21:30:29 +00:00
.map{
row ->
[ [ row.sample.toString() ], row ]
}
.groupTuple()
.map {
sample, rows ->
def is_multirun = rows.size() > 1
[ rows, is_multirun ]
2023-03-17 21:30:29 +00:00
}
.transpose(by: 0)
2023-03-17 21:30:29 +00:00
.map {
row, is_multirun ->
2023-03-17 21:30:29 +00:00
row['is_multirun'] = is_multirun
return row
}
// Split for context-dependent channel generation
2023-03-17 21:30:29 +00:00
ch_parsed_samplesheet = ch_split_samplesheet
.branch { row ->
fasta: row.fasta != ''
nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
fastq: true
}
// Channel generation
2023-03-17 21:30:29 +00:00
ch_fastq = ch_parsed_samplesheet.fastq
2022-03-18 09:47:41 +00:00
.map { create_fastq_channel(it) }
2023-03-17 21:30:29 +00:00
ch_nanopore = ch_parsed_samplesheet.nanopore
2022-03-18 09:47:41 +00:00
.map { create_fastq_channel(it) }
2023-03-17 21:30:29 +00:00
ch_fasta = ch_parsed_samplesheet.fasta
2022-03-21 19:28:09 +00:00
.map { create_fasta_channel(it) }
emit:
2023-03-17 21:30:29 +00:00
fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]
nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]
fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
2022-09-27 13:46:17 +00:00
}
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
2022-03-21 19:28:09 +00:00
def create_fastq_channel(LinkedHashMap row) {
// create meta map
2023-03-17 21:30:29 +00:00
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
2023-03-11 10:34:47 +00:00
meta.id = meta.sample
2023-03-07 12:25:24 +00:00
meta.single_end = row.single_end.toBoolean()
meta.is_fasta = false
// add path(s) of the fastq file(s) to the meta map
if (!file(row.fastq_1).exists()) {
error("ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}")
}
2023-03-07 12:25:24 +00:00
if (meta.single_end) {
2023-03-07 12:25:24 +00:00
return [ meta, [ file(row.fastq_1) ] ]
} else {
2022-03-18 09:47:41 +00:00
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
if (row.fastq_2 != '') {
error("ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}")
2022-03-18 09:47:41 +00:00
}
2023-03-07 12:25:24 +00:00
return [ meta, [ file(row.fastq_1) ] ]
2022-03-18 09:47:41 +00:00
} else {
if (!file(row.fastq_2).exists()) {
error("ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}")
2022-03-18 09:47:41 +00:00
}
2023-03-07 12:25:24 +00:00
return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
}
2023-03-07 12:25:24 +00:00
}
// Function to get list of [ meta, fasta ]
2022-03-21 19:28:09 +00:00
def create_fasta_channel(LinkedHashMap row) {
2023-03-17 21:30:29 +00:00
// don't include multi-run information as we don't do FASTA run merging
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
2023-03-09 14:43:58 +00:00
meta.id = meta.sample
meta.single_end = true
meta.is_fasta = true
if (!file(row.fasta).exists()) {
error("ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}")
}
2023-03-07 12:25:24 +00:00
return [ meta, [ file(row.fasta) ] ]
}