mirror of
https://github.com/MillironX/taxprofiler.git
synced 2024-11-21 20:56:03 +00:00
2140928638
Add 'final reads for profiling' saving mechanism
103 lines
3.5 KiB
Text
103 lines
3.5 KiB
Text
//
|
|
// Check input samplesheet and get read channels
|
|
//
|
|
|
|
include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
|
|
|
|
workflow INPUT_CHECK {
|
|
take:
|
|
samplesheet // file: /path/to/samplesheet.csv
|
|
|
|
main:
|
|
|
|
// Table to list, group per sample, detect if sample has multi-run,
|
|
// then spread back to per-run rows but with multi-run info added to meta
|
|
ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
|
|
.csv
|
|
.splitCsv ( header:true, sep:',' )
|
|
.map{
|
|
row ->
|
|
[ [ row.sample.toString() ], row ]
|
|
}
|
|
.groupTuple()
|
|
.map {
|
|
sample, rows ->
|
|
def is_multirun = rows.size() > 1
|
|
[ rows, is_multirun ]
|
|
}
|
|
.transpose(by: 0)
|
|
.map {
|
|
row, is_multirun ->
|
|
row['is_multirun'] = is_multirun
|
|
return row
|
|
}
|
|
|
|
// Split for context-dependent channel generation
|
|
ch_parsed_samplesheet = ch_split_samplesheet
|
|
.branch { row ->
|
|
fasta: row.fasta != ''
|
|
nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
|
|
fastq: true
|
|
}
|
|
|
|
// Channel generation
|
|
ch_fastq = ch_parsed_samplesheet.fastq
|
|
.map { create_fastq_channel(it) }
|
|
|
|
ch_nanopore = ch_parsed_samplesheet.nanopore
|
|
.map { create_fastq_channel(it) }
|
|
|
|
ch_fasta = ch_parsed_samplesheet.fasta
|
|
.map { create_fasta_channel(it) }
|
|
|
|
emit:
|
|
fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]
|
|
nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]
|
|
fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]
|
|
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
|
|
}
|
|
|
|
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
|
|
def create_fastq_channel(LinkedHashMap row) {
|
|
// create meta map
|
|
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
|
|
meta.id = meta.sample
|
|
meta.single_end = row.single_end.toBoolean()
|
|
meta.is_fasta = false
|
|
|
|
// add path(s) of the fastq file(s) to the meta map
|
|
if (!file(row.fastq_1).exists()) {
|
|
error("ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}")
|
|
}
|
|
|
|
if (meta.single_end) {
|
|
return [ meta, [ file(row.fastq_1) ] ]
|
|
} else {
|
|
if (meta.instrument_platform == 'OXFORD_NANOPORE') {
|
|
if (row.fastq_2 != '') {
|
|
error("ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}")
|
|
}
|
|
return [ meta, [ file(row.fastq_1) ] ]
|
|
} else {
|
|
if (!file(row.fastq_2).exists()) {
|
|
error("ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}")
|
|
}
|
|
return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Function to get list of [ meta, fasta ]
|
|
def create_fasta_channel(LinkedHashMap row) {
|
|
|
|
// don't include multi-run information as we don't do FASTA run merging
|
|
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
|
|
meta.id = meta.sample
|
|
meta.single_end = true
|
|
meta.is_fasta = true
|
|
|
|
if (!file(row.fasta).exists()) {
|
|
error("ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}")
|
|
}
|
|
return [ meta, [ file(row.fasta) ] ]
|
|
}
|