2022-02-18 06:55:14 +00:00
|
|
|
//
|
|
|
|
// Check input samplesheet and get read channels
|
|
|
|
//
|
|
|
|
|
2022-12-12 07:49:13 +00:00
|
|
|
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
|
2022-02-18 06:55:14 +00:00
|
|
|
|
|
|
|
workflow INPUT_CHECK {
|
|
|
|
take:
|
2022-12-12 07:49:13 +00:00
|
|
|
samplesheet // file: /path/to/samplesheet.csv
|
2022-02-18 06:55:14 +00:00
|
|
|
|
|
|
|
main:
|
2023-03-17 21:30:29 +00:00
|
|
|
|
2023-03-22 14:39:45 +00:00
|
|
|
// Table to list, group per sample, detect if sample has multi-run,
|
2023-03-17 21:30:29 +00:00
|
|
|
// then spread back to per-run rows but with multi-run info added to meta
|
|
|
|
ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
2022-12-12 07:49:13 +00:00
|
|
|
.csv
|
2022-02-18 06:55:14 +00:00
|
|
|
.splitCsv ( header:true, sep:',' )
|
2023-03-17 21:30:29 +00:00
|
|
|
.map{
|
|
|
|
row ->
|
|
|
|
[ [ row.sample.toString() ], row ]
|
|
|
|
}
|
|
|
|
.groupTuple()
|
|
|
|
.map {
|
|
|
|
sample, rows ->
|
|
|
|
def is_multirun = rows.size() > 1
|
2023-03-17 21:38:05 +00:00
|
|
|
[ rows, is_multirun ]
|
2023-03-17 21:30:29 +00:00
|
|
|
}
|
2023-03-17 21:38:05 +00:00
|
|
|
.transpose(by: 0)
|
2023-03-17 21:30:29 +00:00
|
|
|
.map {
|
2023-03-17 21:38:05 +00:00
|
|
|
row, is_multirun ->
|
2023-03-17 21:30:29 +00:00
|
|
|
row['is_multirun'] = is_multirun
|
|
|
|
return row
|
|
|
|
}
|
|
|
|
|
2023-03-17 21:38:05 +00:00
|
|
|
// Split for context-dependent channel generation
|
2023-03-17 21:30:29 +00:00
|
|
|
ch_parsed_samplesheet = ch_split_samplesheet
|
2023-03-07 11:40:05 +00:00
|
|
|
.branch { row ->
|
|
|
|
fasta: row.fasta != ''
|
|
|
|
nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
|
2022-02-18 15:51:01 +00:00
|
|
|
fastq: true
|
|
|
|
}
|
|
|
|
|
2023-03-17 21:38:05 +00:00
|
|
|
// Channel generation
|
2023-03-17 21:30:29 +00:00
|
|
|
ch_fastq = ch_parsed_samplesheet.fastq
|
2022-03-18 09:47:41 +00:00
|
|
|
.map { create_fastq_channel(it) }
|
2022-02-18 15:51:01 +00:00
|
|
|
|
2023-03-17 21:30:29 +00:00
|
|
|
ch_nanopore = ch_parsed_samplesheet.nanopore
|
2022-03-18 09:47:41 +00:00
|
|
|
.map { create_fastq_channel(it) }
|
|
|
|
|
2023-03-17 21:30:29 +00:00
|
|
|
ch_fasta = ch_parsed_samplesheet.fasta
|
2022-03-21 19:28:09 +00:00
|
|
|
.map { create_fasta_channel(it) }
|
2022-02-18 06:55:14 +00:00
|
|
|
|
|
|
|
emit:
|
2023-03-17 21:30:29 +00:00
|
|
|
fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]
|
|
|
|
nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]
|
|
|
|
fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]
|
|
|
|
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
2022-09-27 13:46:17 +00:00
|
|
|
}
|
|
|
|
|
2022-02-18 06:55:14 +00:00
|
|
|
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
|
2022-03-21 19:28:09 +00:00
|
|
|
def create_fastq_channel(LinkedHashMap row) {
|
2022-03-15 21:05:37 +00:00
|
|
|
// create meta map
|
2023-03-17 21:30:29 +00:00
|
|
|
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
|
2023-03-11 10:34:47 +00:00
|
|
|
meta.id = meta.sample
|
2023-03-07 12:25:24 +00:00
|
|
|
meta.single_end = row.single_end.toBoolean()
|
|
|
|
meta.is_fasta = false
|
2022-02-18 06:55:14 +00:00
|
|
|
|
2022-03-15 21:05:37 +00:00
|
|
|
// add path(s) of the fastq file(s) to the meta map
|
2022-02-18 06:55:14 +00:00
|
|
|
if (!file(row.fastq_1).exists()) {
|
2023-04-04 09:41:22 +00:00
|
|
|
error("ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}")
|
2022-02-18 06:55:14 +00:00
|
|
|
}
|
2023-03-07 12:25:24 +00:00
|
|
|
|
2022-02-18 06:55:14 +00:00
|
|
|
if (meta.single_end) {
|
2023-03-07 12:25:24 +00:00
|
|
|
return [ meta, [ file(row.fastq_1) ] ]
|
2022-02-18 06:55:14 +00:00
|
|
|
} else {
|
2022-03-18 09:47:41 +00:00
|
|
|
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
|
|
|
|
if (row.fastq_2 != '') {
|
2023-04-04 09:41:22 +00:00
|
|
|
error("ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}")
|
2022-03-18 09:47:41 +00:00
|
|
|
}
|
2023-03-07 12:25:24 +00:00
|
|
|
return [ meta, [ file(row.fastq_1) ] ]
|
2022-03-18 09:47:41 +00:00
|
|
|
} else {
|
|
|
|
if (!file(row.fastq_2).exists()) {
|
2023-04-04 09:41:22 +00:00
|
|
|
error("ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}")
|
2022-03-18 09:47:41 +00:00
|
|
|
}
|
2023-03-07 12:25:24 +00:00
|
|
|
return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
2022-02-18 06:55:14 +00:00
|
|
|
}
|
|
|
|
}
|
2023-03-07 12:25:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Function to get list of [ meta, fasta ]
|
2022-03-21 19:28:09 +00:00
|
|
|
def create_fasta_channel(LinkedHashMap row) {
|
2023-03-17 21:30:29 +00:00
|
|
|
|
|
|
|
// don't include multi-run information as we don't do FASTA run merging
|
|
|
|
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
|
2023-03-09 14:43:58 +00:00
|
|
|
meta.id = meta.sample
|
|
|
|
meta.single_end = true
|
|
|
|
meta.is_fasta = true
|
2022-02-18 15:51:01 +00:00
|
|
|
|
|
|
|
if (!file(row.fasta).exists()) {
|
2023-04-04 09:41:22 +00:00
|
|
|
error("ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}")
|
2022-02-18 15:51:01 +00:00
|
|
|
}
|
2023-03-07 12:25:24 +00:00
|
|
|
return [ meta, [ file(row.fasta) ] ]
|
2022-02-18 15:51:01 +00:00
|
|
|
}
|