taxprofiler/subworkflows/local/input_check.nf

//
// Check input samplesheet and get read channels
//

include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'

workflow INPUT_CHECK {
    take:
    samplesheet // file: /path/to/samplesheet.csv

    main:

    // Table to list, group per sample, detect if sample has multi-run,
    // then spread back to per-run rows but with multi-run info added to meta
    ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )
        .csv
        .splitCsv ( header:true, sep:',' )
        .map{
            row ->
                [ [ row.sample.toString() ], row ]
            }
        .groupTuple()
        .map {
            sample, rows ->
                def is_multirun = rows.size() > 1
            [ rows, is_multirun ]
        }
        .transpose(by: 0)
        .map {
            row, is_multirun ->
                row['is_multirun'] = is_multirun
            return row
        }

    // Split for context-dependent channel generation
    ch_parsed_samplesheet = ch_split_samplesheet
        .branch { row ->
            fasta: row.fasta != ''
            nanopore: row.instrument_platform == 'OXFORD_NANOPORE'
            fastq: true
        }

    // Channel generation
    ch_fastq = ch_parsed_samplesheet.fastq
        .map { create_fastq_channel(it) }

    ch_nanopore = ch_parsed_samplesheet.nanopore
        .map { create_fastq_channel(it) }

    ch_fasta = ch_parsed_samplesheet.fasta
        .map { create_fasta_channel(it) }

    emit:
    fastq    = ch_fastq ?: []                    // channel: [ val(meta), [ reads ] ]
    nanopore = ch_nanopore ?: []                 // channel: [ val(meta), [ reads ] ]
    fasta    = ch_fasta ?: []                    // channel: [ val(meta), fasta ]
    versions = SAMPLESHEET_CHECK.out.versions    // channel: [ versions.yml ]
}

// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
def create_fastq_channel(LinkedHashMap row) {
    // create meta map
    def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])
    meta.id         = meta.sample
    meta.single_end = row.single_end.toBoolean()
    meta.is_fasta   = false

    // add path(s) of the fastq file(s) to the meta map
    if (!file(row.fastq_1).exists()) {
        exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"
    }

    if (meta.single_end) {
        return [ meta, [ file(row.fastq_1) ] ]
    } else {
        if (meta.instrument_platform == 'OXFORD_NANOPORE') {
            if (row.fastq_2 != '') {
                exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"
            }
            return [ meta, [ file(row.fastq_1) ] ]
        } else {
            if (!file(row.fastq_2).exists()) {
                exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"
            }
            return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]
        }
    }
}

// Function to get list of [ meta, fasta ]
def create_fasta_channel(LinkedHashMap row) {

    // don't include multi-run information as we don't do FASTA run merging
    def meta        = row.subMap(['sample', 'run_accession', 'instrument_platform' ])
    meta.id         = meta.sample
    meta.single_end = true
    meta.is_fasta   = true

    if (!file(row.fasta).exists()) {
        exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"
    }
    return [ meta, [ file(row.fasta) ] ]
}
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`//`
			`// Check input samplesheet and get read channels`
			`//`

Revert PEP validation back to python samplesheet check 2022-12-12 07:49:13 +00:00			`include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00
			`workflow INPUT_CHECK {`
			`take:`
Revert PEP validation back to python samplesheet check 2022-12-12 07:49:13 +00:00			`samplesheet // file: /path/to/samplesheet.csv`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00
			`main:`
Add multi-run meta information 2023-03-17 21:30:29 +00:00
Start adding conditions in modules.conf - non-cat not working 2023-03-22 14:39:45 +00:00			`// Table to list, group per sample, detect if sample has multi-run,`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`// then spread back to per-run rows but with multi-run info added to meta`
			`ch_split_samplesheet = SAMPLESHEET_CHECK ( samplesheet )`
Revert PEP validation back to python samplesheet check 2022-12-12 07:49:13 +00:00			`.csv`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`.splitCsv ( header:true, sep:',' )`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`.map{`
			`row ->`
			`[ [ row.sample.toString() ], row ]`
			`}`
			`.groupTuple()`
			`.map {`
			`sample, rows ->`
			`def is_multirun = rows.size() > 1`
Logical ordering and improved commenting 2023-03-17 21:38:05 +00:00			`[ rows, is_multirun ]`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`}`
Logical ordering and improved commenting 2023-03-17 21:38:05 +00:00			`.transpose(by: 0)`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`.map {`
Logical ordering and improved commenting 2023-03-17 21:38:05 +00:00			`row, is_multirun ->`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`row['is_multirun'] = is_multirun`
			`return row`
			`}`

Logical ordering and improved commenting 2023-03-17 21:38:05 +00:00			`// Split for context-dependent channel generation`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`ch_parsed_samplesheet = ch_split_samplesheet`
style: use parameter for branch closure 2023-03-07 11:40:05 +00:00			`.branch { row ->`
			`fasta: row.fasta != ''`
			`nanopore: row.instrument_platform == 'OXFORD_NANOPORE'`
Get skeleton read processing to input for profiling 2022-02-18 15:51:01 +00:00			`fastq: true`
			`}`

Logical ordering and improved commenting 2023-03-17 21:38:05 +00:00			`// Channel generation`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`ch_fastq = ch_parsed_samplesheet.fastq`
Add nanopore channel 2022-03-18 09:47:41 +00:00			`.map { create_fastq_channel(it) }`
Get skeleton read processing to input for profiling 2022-02-18 15:51:01 +00:00
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`ch_nanopore = ch_parsed_samplesheet.nanopore`
Add nanopore channel 2022-03-18 09:47:41 +00:00			`.map { create_fastq_channel(it) }`

Add multi-run meta information 2023-03-17 21:30:29 +00:00			`ch_fasta = ch_parsed_samplesheet.fasta`
Add working basic test to begin 2022-03-21 19:28:09 +00:00			`.map { create_fasta_channel(it) }`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00
			`emit:`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`fastq = ch_fastq ?: [] // channel: [ val(meta), [ reads ] ]`
			`nanopore = ch_nanopore ?: [] // channel: [ val(meta), [ reads ] ]`
			`fasta = ch_fasta ?: [] // channel: [ val(meta), fasta ]`
			`versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]`
Move to a function 2022-09-27 13:46:17 +00:00			`}`

initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]`
Add working basic test to begin 2022-03-21 19:28:09 +00:00			`def create_fastq_channel(LinkedHashMap row) {`
Template update for nf-core/tools version 2.3 2022-03-15 21:05:37 +00:00			`// create meta map`
Add multi-run meta information 2023-03-17 21:30:29 +00:00			`def meta = row.subMap(['sample', 'run_accession', 'instrument_platform', 'is_multirun'])`
fix: restore setting id on FASTQ meta 2023-03-11 10:34:47 +00:00			`meta.id = meta.sample`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`meta.single_end = row.single_end.toBoolean()`
			`meta.is_fasta = false`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00
Template update for nf-core/tools version 2.3 2022-03-15 21:05:37 +00:00			`// add path(s) of the fastq file(s) to the meta map`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`if (!file(row.fastq_1).exists()) {`
			`exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}"`
			`}`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`if (meta.single_end) {`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`return [ meta, [ file(row.fastq_1) ] ]`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`} else {`
Add nanopore channel 2022-03-18 09:47:41 +00:00			`if (meta.instrument_platform == 'OXFORD_NANOPORE') {`
			`if (row.fastq_2 != '') {`
			`exit 1, "ERROR: Please check input samplesheet -> For Oxford Nanopore reads Read 2 FastQ should be empty!\n${row.fastq_2}"`
			`}`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`return [ meta, [ file(row.fastq_1) ] ]`
Add nanopore channel 2022-03-18 09:47:41 +00:00			`} else {`
			`if (!file(row.fastq_2).exists()) {`
			`exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}"`
			`}`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`return [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ]`
initial template build from nf-core/tools, version 2.2 2022-02-18 06:55:14 +00:00			`}`
			`}`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`}`

			`// Function to get list of [ meta, fasta ]`
Add working basic test to begin 2022-03-21 19:28:09 +00:00			`def create_fasta_channel(LinkedHashMap row) {`
Add multi-run meta information 2023-03-17 21:30:29 +00:00
			`// don't include multi-run information as we don't do FASTA run merging`
			`def meta = row.subMap(['sample', 'run_accession', 'instrument_platform' ])`
fix: insert `id` key again 2023-03-09 14:43:58 +00:00			`meta.id = meta.sample`
			`meta.single_end = true`
			`meta.is_fasta = true`
Get skeleton read processing to input for profiling 2022-02-18 15:51:01 +00:00
			`if (!file(row.fasta).exists()) {`
			`exit 1, "ERROR: Please check input samplesheet -> FastA file does not exist!\n${row.fasta}"`
			`}`
style: use subMap and early returns 2023-03-07 12:25:24 +00:00			`return [ meta, [ file(row.fasta) ] ]`
Get skeleton read processing to input for profiling 2022-02-18 15:51:01 +00:00			`}`