1
0
Fork 0
mirror of https://github.com/MillironX/taxprofiler.git synced 2024-11-22 01:46:05 +00:00

Add multi-run meta information

This commit is contained in:
James Fellows Yates 2023-03-17 22:30:29 +01:00
parent af61e007b3
commit f9af0040bf

View file

@ -9,35 +9,58 @@ workflow INPUT_CHECK {
samplesheet // file: /path/to/samplesheet.csv samplesheet // file: /path/to/samplesheet.csv
main: main:
parsed_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
// Table to list, group per sample, detect if sample has multi-run,
// then spread back to per-run rows but with multi-run info added to meta
ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
.csv .csv
.splitCsv ( header:true, sep:',' ) .splitCsv ( header:true, sep:',' )
.map{
row ->
[ [ row.sample.toString() ], row ]
}
.groupTuple()
.map {
sample, rows ->
def is_multirun = rows.size() > 1
[ is_multirun, rows ]
}
.transpose(by: 1)
.map {
is_multirun, row ->
row['is_multirun'] = is_multirun
return row
}
ch_parsed_samplesheet = ch_split_samplesheet
.branch { row -> .branch { row ->
fasta: row.fasta != '' fasta: row.fasta != ''
nanopore: row.instrument_platform == 'OXFORD_NANOPORE' nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
fastq: true fastq: true
} }
fastq = parsed_samplesheet.fastq ch_fastq = ch_parsed_samplesheet.fastq
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
nanopore = parsed_samplesheet.nanopore ch_nanopore = ch_parsed_samplesheet.nanopore
.map { create_fastq_channel(it) } .map { create_fastq_channel(it) }
fasta = parsed_samplesheet.fasta ch_fasta = ch_parsed_samplesheet.fasta
.map { create_fasta_channel(it) } .map { create_fasta_channel(it) }
.dump(tag: "boop")
emit: emit:
fastq = fastq ?: [] // channel: [ val(meta), [ reads ] ] fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]
nanopore = nanopore ?: [] // channel: [ val(meta), [ reads ] ] nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]
fasta = fasta ?: [] // channel: [ val(meta), fasta ] fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]
versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
} }
// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] // Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
def create_fastq_channel(LinkedHashMap row) { def create_fastq_channel(LinkedHashMap row) {
// create meta map // create meta map
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform']) def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
meta.id = meta.sample meta.id = meta.sample
meta.single_end = row.single_end.toBoolean() meta.single_end = row.single_end.toBoolean()
meta.is_fasta = false meta.is_fasta = false
@ -66,7 +89,9 @@ def create_fastq_channel(LinkedHashMap row) {
// Function to get list of [ meta, fasta ] // Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) { def create_fasta_channel(LinkedHashMap row) {
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform'])
// don't include multi-run information as we don't do FASTA run merging
def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
meta.id = meta.sample meta.id = meta.sample
meta.single_end = true meta.single_end = true
meta.is_fasta = true meta.is_fasta = true