You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
taxprofiler/subworkflows/local/input_check.nf

104 lines
3.5 KiB
Plaintext

//
// Check input samplesheet and get read channels
//
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
workflow INPUT_CHECK {
take:
samplesheet // file: /path/to/samplesheet.csv
main:
// Table to list, group per sample, detect if sample has multi-run,
// then spread back to per-run rows but with multi-run info added to meta
ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv
.splitCsv ( header:true, sep:',' )
.map{
row ->
[ [ row.sample.toString() ], row ]
}
.groupTuple()
.map {
sample, rows ->
def is_multirun = rows.size() > 1
[ rows, is_multirun ]
}
.transpose(by: 0)
.map {
row, is_multirun ->
row['is_multirun'] = is_multirun
return row
}
// Split for context-dependent channel generation
ch_parsed_samplesheet = ch_split_samplesheet
.branch { row ->
fasta: row.fasta != ''
nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
fastq: true
}
// Channel generation
ch_fastq = ch_parsed_samplesheet.fastq
.map { create_fastq_channel(it) }
ch_nanopore = ch_parsed_samplesheet.nanopore
.map { create_fastq_channel(it) }
ch_fasta = ch_parsed_samplesheet.fasta
.map { create_fasta_channel(it) }
emit:
fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]
nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]
fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
}
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
def create_fastq_channel(LinkedHashMap row) {
// create meta map
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
meta.id = meta.sample
meta.single_end = row.single_end.toBoolean()
meta.is_fasta = false
// add path(s) of the fastq file(s) to the meta map
if (!file(row.fastq_1).exists()) {
error("ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}")
}
if (meta.single_end) {
return [ meta, [ file(row.fastq_1) ] ]
} else {
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
if (row.fastq_2 != '') {
error("ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}")
}
return [ meta, [ file(row.fastq_1) ] ]
} else {
if (!file(row.fastq_2).exists()) {
error("ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}")
}
return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
}
}
}
// Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) {
// don't include multi-run information as we don't do FASTA run merging
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
meta.id = meta.sample
meta.single_end = true
meta.is_fasta = true
if (!file(row.fasta).exists()) {
error("ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}")
}
return [ meta, [ file(row.fasta) ] ]
}