From f32dc15414700682c27dda0a35aaa6b6287ebc4d Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Thu, 16 Jun 2022 15:03:13 +0200 Subject: [PATCH] update bclconvert module, as used in nf-core/demultiplex (#1569) * update bclconvert module, as used in nf-core/demultiplex * reconfigure test with new test data * update test * formatting * update outputs, update meta * update module and test * update test config, test.yml * fix linting * emit logs and reports as dir * fix typo, recreate test.yml * fix linting Co-authored-by: CMGG ICT Team --- modules/bclconvert/main.nf | 61 +++++------------------ modules/bclconvert/meta.yml | 22 +++++++-- tests/config/test_data.config | 2 + tests/modules/bclconvert/main.nf | 29 +++++------ tests/modules/bclconvert/nextflow.config | 13 ++++- tests/modules/bclconvert/test.yml | 62 +++++++++++------------- 6 files changed, 85 insertions(+), 104 deletions(-) diff --git a/modules/bclconvert/main.nf b/modules/bclconvert/main.nf index e6925b50..c891ab7f 100644 --- a/modules/bclconvert/main.nf +++ b/modules/bclconvert/main.nf @@ -1,5 +1,5 @@ process BCLCONVERT { - tag '$samplesheet' + tag "$meta.id" label 'process_high' if (params.enable_conda) { @@ -8,15 +8,17 @@ process BCLCONVERT { container "nfcore/bclconvert:3.9.3" input: - path samplesheet - path run_dir + tuple val(meta), path(samplesheet), path(run_dir) output: - path "*.fastq.gz" ,emit: fastq - path "Reports/*.{csv,xml,bin}" ,emit: reports - path "Logs/*.{log,txt}" ,emit: logs - path "InterOp/*.bin" ,emit: interop - path "versions.yml" ,emit: versions + tuple val(meta), path("**[!Undetermined]_S*_L00?_R?_00?.fastq.gz") ,emit: fastq + tuple val(meta), path("**_S*_L00?_I?_00?.fastq.gz") ,optional:true ,emit: fastq_idx + tuple val(meta), path("Undetermined_S0_L00?_R?_00?.fastq.gz") ,emit: undetermined + tuple val(meta), path("Undetermined_S0_L00?_I?_00?.fastq.gz") ,optional:true, emit: undetermined_idx + tuple val(meta), path("Reports") ,emit: reports + tuple val(meta), path("Logs") ,emit: logs + tuple val(meta), path("**.bin") ,emit: interop + path("versions.yml") ,emit: versions when: task.ext.when == null || task.ext.when @@ -25,54 +27,13 @@ process BCLCONVERT { def args = task.ext.args ?: '' """ - bcl-convert \ + bcl-convert \\ $args \\ --output-directory . \\ --bcl-input-directory ${run_dir} \\ --sample-sheet ${samplesheet} \\ --bcl-num-parallel-tiles ${task.cpus} - mkdir InterOp - cp ${run_dir}/InterOp/*.bin InterOp/ - mv Reports/*.bin InterOp/ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //') - END_VERSIONS - """ - - stub: - """ - echo "sample1_S1_L001_R1_001" > sample1_S1_L001_R1_001.fastq.gz - echo "sample1_S1_L001_R2_001" > sample1_S1_L001_R2_001.fastq.gz - echo "sample1_S1_L002_R1_001" > sample1_S1_L002_R1_001.fastq.gz - echo "sample1_S1_L002_R2_001" > sample1_S1_L002_R2_001.fastq.gz - echo "sample2_S2_L001_R1_001" > sample2_S2_L001_R1_001.fastq.gz - echo "sample2_S2_L001_R2_001" > sample2_S2_L001_R2_001.fastq.gz - echo "sample2_S2_L002_R1_001" > sample2_S2_L002_R1_001.fastq.gz - echo "sample2_S2_L002_R2_001" > sample2_S2_L002_R2_001.fastq.gz - - mkdir Reports - echo "Adapter_Metrics" > Reports/Adapter_Metrics.csv - echo "Demultiplex_Stats" > Reports/Demultiplex_Stats.csv - echo "fastq_list" > Reports/fastq_list.csv - echo "Index_Hopping_Counts" > Reports/Index_Hopping_Counts.csv - echo "IndexMetricsOut" > Reports/IndexMetricsOut.bin - echo "Quality_Metrics" > Reports/Quality_Metrics.csv - echo "RunInfo" > Reports/RunInfo.xml - echo "SampleSheet" > Reports/SampleSheet.csv - echo "Top_Unknown_Barcodes" > Reports/Top_Unknown_Barcodes.csv - - mkdir Logs - echo "Errors" > Logs/Errors.log - echo "FastqComplete" > Logs/FastqComplete.txt - echo "Info" > Logs/Info.log - echo "Warnings" > Logs/Warnings.log - - mkdir InterOp/ - echo "InterOp" > InterOp/InterOp.bin - cat <<-END_VERSIONS > versions.yml "${task.process}": bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //') diff --git a/modules/bclconvert/meta.yml b/modules/bclconvert/meta.yml index 5c59a978..91d2ca6a 100644 --- a/modules/bclconvert/meta.yml +++ b/modules/bclconvert/meta.yml @@ -12,6 +12,10 @@ tools: licence: "ILLUMINA" input: + - meta: + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - samplesheet: type: file description: "Input samplesheet" @@ -27,8 +31,20 @@ output: pattern: "versions.yml" - fastq: type: file - description: Demultiplexed FASTQ files - pattern: "*.{fastq.gz}" + description: Demultiplexed sample FASTQ files + pattern: "**_S*_L00?_R?_00?.fastq.gz" + - fastq_idx: + type: file + description: Optional demultiplexed index FASTQ files + pattern: "**_S*_L00?_I?_00?.fastq.gz" + - undetermined: + type: file + description: Undetermined sample FASTQ files + pattern: "Undetermined_S0_L00?_R?_00?.fastq.gz" + - undetermined_idx: + type: file + description: Optional undetermined index FASTQ files + pattern: "Undetermined_S0_L00?_I?_00?.fastq.gz" - reports: type: file description: Demultiplexing Reports @@ -40,6 +56,6 @@ output: - interop: type: file description: Interop files - pattern: "Interop/*.{bin}" + pattern: "*.{bin}" authors: - "@matthdsm" diff --git a/tests/config/test_data.config b/tests/config/test_data.config index b97f8899..47605e19 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -336,6 +336,8 @@ params { test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" + test_flowcell_samplesheet = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv" + } 'pacbio' { primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" diff --git a/tests/modules/bclconvert/main.nf b/tests/modules/bclconvert/main.nf index e8a78e4f..b2ba3b86 100644 --- a/tests/modules/bclconvert/main.nf +++ b/tests/modules/bclconvert/main.nf @@ -3,20 +3,21 @@ nextflow.enable.dsl = 2 include { BCLCONVERT } from '../../../modules/bclconvert/main.nf' - -process STUB_BCLCONVERT_INPUT { - output: - path "SampleSheet.csv" ,emit: samplesheet - path "DDMMYY_SERIAL_FLOWCELL" ,emit: run_dir - - stub: - """ - mkdir DDMMYY_SERIAL_FLOWCELL - echo "SampleSheet" > SampleSheet.csv - """ -} +include { UNTAR } from '../../../modules/untar/main.nf' workflow test_bclconvert { - STUB_BCLCONVERT_INPUT () - BCLCONVERT (STUB_BCLCONVERT_INPUT.out.samplesheet, STUB_BCLCONVERT_INPUT.out.run_dir) + ch_flowcell = Channel.value([ + [id:'test', lane:1 ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_flowcell_samplesheet'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true)]) + + ch_flowcell + .multiMap { meta, ss, run -> + samplesheet: [meta, ss] + tar: [meta, run] + }.set{ ch_fc_split } + + ch_flowcell_untar = ch_fc_split.samplesheet.join( UNTAR ( ch_fc_split.tar ).untar ) + + BCLCONVERT (ch_flowcell_untar) } diff --git a/tests/modules/bclconvert/nextflow.config b/tests/modules/bclconvert/nextflow.config index 50f50a7a..ae8c9c78 100644 --- a/tests/modules/bclconvert/nextflow.config +++ b/tests/modules/bclconvert/nextflow.config @@ -1,5 +1,14 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file + withName: BCLCONVERT { + ext.args = {[ + meta.lane ? "--bcl-only-lane ${meta.lane}" : "", + "--force", + "--first-tile-only true" + ].join(" ").trim()} + } + withName: UNTAR { + publishDir = [ enabled: false ] + } +} diff --git a/tests/modules/bclconvert/test.yml b/tests/modules/bclconvert/test.yml index 65e71a59..f03b72e2 100644 --- a/tests/modules/bclconvert/test.yml +++ b/tests/modules/bclconvert/test.yml @@ -1,52 +1,44 @@ - name: bclconvert test_bclconvert - command: nextflow run tests/modules/bclconvert -entry test_bclconvert -c tests/config/nextflow.config -stub-run + command: nextflow run ./tests/modules/bclconvert -entry test_bclconvert -c ./tests/config/nextflow.config -c ./tests/modules/bclconvert/nextflow.config tags: - bclconvert files: - - path: output/bclconvert/InterOp/InterOp.bin - md5sum: d3dea0bb4ab1c8754af324f40b001481 - path: output/bclconvert/Logs/Errors.log - md5sum: 334645f09074b2597a692e395b716a9c - path: output/bclconvert/Logs/FastqComplete.txt - md5sum: a4c4c6ce2d0de67d3b7ac7d1fcb512e4 - path: output/bclconvert/Logs/Info.log - md5sum: d238822d379f2277cac950ca986cb660 - path: output/bclconvert/Logs/Warnings.log - md5sum: aeefd2d631817e170f88f25ecaaf4664 - path: output/bclconvert/Reports/Adapter_Metrics.csv - md5sum: af62e9c7b44940cfd8ea11064a1f42ae + md5sum: 989240b8840b2169ac1061f952c90f6c - path: output/bclconvert/Reports/Demultiplex_Stats.csv - md5sum: d11313931fcaabb5ce159462ad3dd1da + md5sum: bb2ed9c6b46e03fbc060efd2c75398b0 - path: output/bclconvert/Reports/IndexMetricsOut.bin - md5sum: 6bcee11c8145e3b1059ceaa91d2f5be7 + md5sum: 9e688c58a5487b8eaf69c9e1005ad0bf - path: output/bclconvert/Reports/Index_Hopping_Counts.csv - md5sum: 697e40e0c0d48b4bd25f138ef60b0bde + md5sum: 1059369e375fd8f8423c0f6c934be978 - path: output/bclconvert/Reports/Quality_Metrics.csv - md5sum: 3902fd38f6b01f1ce0f0e8724238f8f2 + md5sum: 6614accb1bb414fe312b17b81f5521f7 - path: output/bclconvert/Reports/RunInfo.xml - md5sum: 5bef7c7e76360231b0c4afdfc915fd44 + md5sum: 03038959f4dd181c86bc97ae71fe270a - path: output/bclconvert/Reports/SampleSheet.csv - md5sum: c579e7d2c9c917c4cfb875a0373c0936 + md5sum: dc0dffd39541dd6cc5b4801d768a8d2b - path: output/bclconvert/Reports/Top_Unknown_Barcodes.csv - md5sum: 39a5e7f6d21c12d6051afdc8261b6330 + md5sum: 2e2faba761137f228e56bd3428453ccc - path: output/bclconvert/Reports/fastq_list.csv - md5sum: 32c51ab10e013fd547928de57361ffcb - - path: output/bclconvert/sample1_S1_L001_R1_001.fastq.gz - md5sum: 9b831a39755935333f86f167527a094d - - path: output/bclconvert/sample1_S1_L001_R2_001.fastq.gz - md5sum: 082f4f767b7619f409ca7e752ef482bf - - path: output/bclconvert/sample1_S1_L002_R1_001.fastq.gz - md5sum: 837764c89db93dfb53cd663c4f26f3d7 - - path: output/bclconvert/sample1_S1_L002_R2_001.fastq.gz - md5sum: 1a42cf6ba0bb8fc7770f278e6d1ab676 - - path: output/bclconvert/sample2_S2_L001_R1_001.fastq.gz - md5sum: 475bc426b7cc48d0551d40e31457dc78 - - path: output/bclconvert/sample2_S2_L001_R2_001.fastq.gz - md5sum: f670ccd7d9352e0e67fe1c1232429d94 - - path: output/bclconvert/sample2_S2_L002_R1_001.fastq.gz - md5sum: ebd5ff6fa5603e7d704b5a10598de58c - - path: output/bclconvert/sample2_S2_L002_R2_001.fastq.gz - md5sum: 2f83b460f52620d2548c7ef8845b31d7 - - path: output/stub/SampleSheet.csv - md5sum: c579e7d2c9c917c4cfb875a0373c0936 - - path: output/bclconvert/versions.yml + md5sum: 05bc84f51840f5754cfb8381b36f2cb0 + - path: output/bclconvert/Sample1_S1_L001_R1_001.fastq.gz + md5sum: 883037281293a2ec4995d34257f19866 + - path: output/bclconvert/Undetermined_S0_L001_R1_001.fastq.gz + md5sum: a4745abc5e7fdb89cc6df3069f3c6e69 + - path: output/bclconvert/flowcell/InterOp/ControlMetricsOut.bin + md5sum: 6d77b38d0793a6e1ce1e85706e488953 + - path: output/bclconvert/flowcell/InterOp/CorrectedIntMetricsOut.bin + md5sum: 2bbf84d3be72734addaa2fe794711434 + - path: output/bclconvert/flowcell/InterOp/ErrorMetricsOut.bin + md5sum: 38c88def138e9bb832539911affdb286 + - path: output/bclconvert/flowcell/InterOp/ExtractionMetricsOut.bin + md5sum: 7497c3178837eea8f09350b5cd252e99 + - path: output/bclconvert/flowcell/InterOp/IndexMetricsOut.bin + - path: output/bclconvert/flowcell/InterOp/QMetricsOut.bin + md5sum: 7e9f198d53ebdfbb699a5f94cf1ed51c + - path: output/bclconvert/flowcell/InterOp/TileMetricsOut.bin + md5sum: 83891751ec1c91a425a524b476b6ca3c