update bclconvert module, as used in nf-core/demultiplex (#1569)

* update bclconvert module, as used in nf-core/demultiplex

* reconfigure test with new test data

* update test

* formatting

* update outputs, update meta

* update module and test

* update test config, test.yml

* fix linting

* emit logs and reports as dir

* fix typo, recreate test.yml

* fix linting

Co-authored-by: CMGG ICT Team <ict@cmgg.be>
This commit is contained in:
Matthias De Smet 2022-06-16 15:03:13 +02:00 committed by GitHub
parent 1ac223ad43
commit f32dc15414
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 85 additions and 104 deletions

View file

@ -1,5 +1,5 @@
process BCLCONVERT { process BCLCONVERT {
tag '$samplesheet' tag "$meta.id"
label 'process_high' label 'process_high'
if (params.enable_conda) { if (params.enable_conda) {
@ -8,15 +8,17 @@ process BCLCONVERT {
container "nfcore/bclconvert:3.9.3" container "nfcore/bclconvert:3.9.3"
input: input:
path samplesheet tuple val(meta), path(samplesheet), path(run_dir)
path run_dir
output: output:
path "*.fastq.gz" ,emit: fastq tuple val(meta), path("**[!Undetermined]_S*_L00?_R?_00?.fastq.gz") ,emit: fastq
path "Reports/*.{csv,xml,bin}" ,emit: reports tuple val(meta), path("**_S*_L00?_I?_00?.fastq.gz") ,optional:true ,emit: fastq_idx
path "Logs/*.{log,txt}" ,emit: logs tuple val(meta), path("Undetermined_S0_L00?_R?_00?.fastq.gz") ,emit: undetermined
path "InterOp/*.bin" ,emit: interop tuple val(meta), path("Undetermined_S0_L00?_I?_00?.fastq.gz") ,optional:true, emit: undetermined_idx
path "versions.yml" ,emit: versions tuple val(meta), path("Reports") ,emit: reports
tuple val(meta), path("Logs") ,emit: logs
tuple val(meta), path("**.bin") ,emit: interop
path("versions.yml") ,emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -25,54 +27,13 @@ process BCLCONVERT {
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
""" """
bcl-convert \ bcl-convert \\
$args \\ $args \\
--output-directory . \\ --output-directory . \\
--bcl-input-directory ${run_dir} \\ --bcl-input-directory ${run_dir} \\
--sample-sheet ${samplesheet} \\ --sample-sheet ${samplesheet} \\
--bcl-num-parallel-tiles ${task.cpus} --bcl-num-parallel-tiles ${task.cpus}
mkdir InterOp
cp ${run_dir}/InterOp/*.bin InterOp/
mv Reports/*.bin InterOp/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')
END_VERSIONS
"""
stub:
"""
echo "sample1_S1_L001_R1_001" > sample1_S1_L001_R1_001.fastq.gz
echo "sample1_S1_L001_R2_001" > sample1_S1_L001_R2_001.fastq.gz
echo "sample1_S1_L002_R1_001" > sample1_S1_L002_R1_001.fastq.gz
echo "sample1_S1_L002_R2_001" > sample1_S1_L002_R2_001.fastq.gz
echo "sample2_S2_L001_R1_001" > sample2_S2_L001_R1_001.fastq.gz
echo "sample2_S2_L001_R2_001" > sample2_S2_L001_R2_001.fastq.gz
echo "sample2_S2_L002_R1_001" > sample2_S2_L002_R1_001.fastq.gz
echo "sample2_S2_L002_R2_001" > sample2_S2_L002_R2_001.fastq.gz
mkdir Reports
echo "Adapter_Metrics" > Reports/Adapter_Metrics.csv
echo "Demultiplex_Stats" > Reports/Demultiplex_Stats.csv
echo "fastq_list" > Reports/fastq_list.csv
echo "Index_Hopping_Counts" > Reports/Index_Hopping_Counts.csv
echo "IndexMetricsOut" > Reports/IndexMetricsOut.bin
echo "Quality_Metrics" > Reports/Quality_Metrics.csv
echo "RunInfo" > Reports/RunInfo.xml
echo "SampleSheet" > Reports/SampleSheet.csv
echo "Top_Unknown_Barcodes" > Reports/Top_Unknown_Barcodes.csv
mkdir Logs
echo "Errors" > Logs/Errors.log
echo "FastqComplete" > Logs/FastqComplete.txt
echo "Info" > Logs/Info.log
echo "Warnings" > Logs/Warnings.log
mkdir InterOp/
echo "InterOp" > InterOp/InterOp.bin
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
"${task.process}": "${task.process}":
bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //') bclconvert: \$(bcl-convert -V 2>&1 | head -n 1 | sed 's/^.*Version //')

View file

@ -12,6 +12,10 @@ tools:
licence: "ILLUMINA" licence: "ILLUMINA"
input: input:
- meta:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- samplesheet: - samplesheet:
type: file type: file
description: "Input samplesheet" description: "Input samplesheet"
@ -27,8 +31,20 @@ output:
pattern: "versions.yml" pattern: "versions.yml"
- fastq: - fastq:
type: file type: file
description: Demultiplexed FASTQ files description: Demultiplexed sample FASTQ files
pattern: "*.{fastq.gz}" pattern: "**_S*_L00?_R?_00?.fastq.gz"
- fastq_idx:
type: file
description: Optional demultiplexed index FASTQ files
pattern: "**_S*_L00?_I?_00?.fastq.gz"
- undetermined:
type: file
description: Undetermined sample FASTQ files
pattern: "Undetermined_S0_L00?_R?_00?.fastq.gz"
- undetermined_idx:
type: file
description: Optional undetermined index FASTQ files
pattern: "Undetermined_S0_L00?_I?_00?.fastq.gz"
- reports: - reports:
type: file type: file
description: Demultiplexing Reports description: Demultiplexing Reports
@ -40,6 +56,6 @@ output:
- interop: - interop:
type: file type: file
description: Interop files description: Interop files
pattern: "Interop/*.{bin}" pattern: "*.{bin}"
authors: authors:
- "@matthdsm" - "@matthdsm"

View file

@ -336,6 +336,8 @@ params {
test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor"
test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz"
test_flowcell_samplesheet = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv"
} }
'pacbio' { 'pacbio' {
primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta"

View file

@ -3,20 +3,21 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { BCLCONVERT } from '../../../modules/bclconvert/main.nf' include { BCLCONVERT } from '../../../modules/bclconvert/main.nf'
include { UNTAR } from '../../../modules/untar/main.nf'
process STUB_BCLCONVERT_INPUT {
output:
path "SampleSheet.csv" ,emit: samplesheet
path "DDMMYY_SERIAL_FLOWCELL" ,emit: run_dir
stub:
"""
mkdir DDMMYY_SERIAL_FLOWCELL
echo "SampleSheet" > SampleSheet.csv
"""
}
workflow test_bclconvert { workflow test_bclconvert {
STUB_BCLCONVERT_INPUT () ch_flowcell = Channel.value([
BCLCONVERT (STUB_BCLCONVERT_INPUT.out.samplesheet, STUB_BCLCONVERT_INPUT.out.run_dir) [id:'test', lane:1 ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_flowcell_samplesheet'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true)])
ch_flowcell
.multiMap { meta, ss, run ->
samplesheet: [meta, ss]
tar: [meta, run]
}.set{ ch_fc_split }
ch_flowcell_untar = ch_fc_split.samplesheet.join( UNTAR ( ch_fc_split.tar ).untar )
BCLCONVERT (ch_flowcell_untar)
} }

View file

@ -1,5 +1,14 @@
process { process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BCLCONVERT {
ext.args = {[
meta.lane ? "--bcl-only-lane ${meta.lane}" : "",
"--force",
"--first-tile-only true"
].join(" ").trim()}
}
withName: UNTAR {
publishDir = [ enabled: false ]
}
} }

View file

@ -1,52 +1,44 @@
- name: bclconvert test_bclconvert - name: bclconvert test_bclconvert
command: nextflow run tests/modules/bclconvert -entry test_bclconvert -c tests/config/nextflow.config -stub-run command: nextflow run ./tests/modules/bclconvert -entry test_bclconvert -c ./tests/config/nextflow.config -c ./tests/modules/bclconvert/nextflow.config
tags: tags:
- bclconvert - bclconvert
files: files:
- path: output/bclconvert/InterOp/InterOp.bin
md5sum: d3dea0bb4ab1c8754af324f40b001481
- path: output/bclconvert/Logs/Errors.log - path: output/bclconvert/Logs/Errors.log
md5sum: 334645f09074b2597a692e395b716a9c
- path: output/bclconvert/Logs/FastqComplete.txt - path: output/bclconvert/Logs/FastqComplete.txt
md5sum: a4c4c6ce2d0de67d3b7ac7d1fcb512e4
- path: output/bclconvert/Logs/Info.log - path: output/bclconvert/Logs/Info.log
md5sum: d238822d379f2277cac950ca986cb660
- path: output/bclconvert/Logs/Warnings.log - path: output/bclconvert/Logs/Warnings.log
md5sum: aeefd2d631817e170f88f25ecaaf4664
- path: output/bclconvert/Reports/Adapter_Metrics.csv - path: output/bclconvert/Reports/Adapter_Metrics.csv
md5sum: af62e9c7b44940cfd8ea11064a1f42ae md5sum: 989240b8840b2169ac1061f952c90f6c
- path: output/bclconvert/Reports/Demultiplex_Stats.csv - path: output/bclconvert/Reports/Demultiplex_Stats.csv
md5sum: d11313931fcaabb5ce159462ad3dd1da md5sum: bb2ed9c6b46e03fbc060efd2c75398b0
- path: output/bclconvert/Reports/IndexMetricsOut.bin - path: output/bclconvert/Reports/IndexMetricsOut.bin
md5sum: 6bcee11c8145e3b1059ceaa91d2f5be7 md5sum: 9e688c58a5487b8eaf69c9e1005ad0bf
- path: output/bclconvert/Reports/Index_Hopping_Counts.csv - path: output/bclconvert/Reports/Index_Hopping_Counts.csv
md5sum: 697e40e0c0d48b4bd25f138ef60b0bde md5sum: 1059369e375fd8f8423c0f6c934be978
- path: output/bclconvert/Reports/Quality_Metrics.csv - path: output/bclconvert/Reports/Quality_Metrics.csv
md5sum: 3902fd38f6b01f1ce0f0e8724238f8f2 md5sum: 6614accb1bb414fe312b17b81f5521f7
- path: output/bclconvert/Reports/RunInfo.xml - path: output/bclconvert/Reports/RunInfo.xml
md5sum: 5bef7c7e76360231b0c4afdfc915fd44 md5sum: 03038959f4dd181c86bc97ae71fe270a
- path: output/bclconvert/Reports/SampleSheet.csv - path: output/bclconvert/Reports/SampleSheet.csv
md5sum: c579e7d2c9c917c4cfb875a0373c0936 md5sum: dc0dffd39541dd6cc5b4801d768a8d2b
- path: output/bclconvert/Reports/Top_Unknown_Barcodes.csv - path: output/bclconvert/Reports/Top_Unknown_Barcodes.csv
md5sum: 39a5e7f6d21c12d6051afdc8261b6330 md5sum: 2e2faba761137f228e56bd3428453ccc
- path: output/bclconvert/Reports/fastq_list.csv - path: output/bclconvert/Reports/fastq_list.csv
md5sum: 32c51ab10e013fd547928de57361ffcb md5sum: 05bc84f51840f5754cfb8381b36f2cb0
- path: output/bclconvert/sample1_S1_L001_R1_001.fastq.gz - path: output/bclconvert/Sample1_S1_L001_R1_001.fastq.gz
md5sum: 9b831a39755935333f86f167527a094d md5sum: 883037281293a2ec4995d34257f19866
- path: output/bclconvert/sample1_S1_L001_R2_001.fastq.gz - path: output/bclconvert/Undetermined_S0_L001_R1_001.fastq.gz
md5sum: 082f4f767b7619f409ca7e752ef482bf md5sum: a4745abc5e7fdb89cc6df3069f3c6e69
- path: output/bclconvert/sample1_S1_L002_R1_001.fastq.gz - path: output/bclconvert/flowcell/InterOp/ControlMetricsOut.bin
md5sum: 837764c89db93dfb53cd663c4f26f3d7 md5sum: 6d77b38d0793a6e1ce1e85706e488953
- path: output/bclconvert/sample1_S1_L002_R2_001.fastq.gz - path: output/bclconvert/flowcell/InterOp/CorrectedIntMetricsOut.bin
md5sum: 1a42cf6ba0bb8fc7770f278e6d1ab676 md5sum: 2bbf84d3be72734addaa2fe794711434
- path: output/bclconvert/sample2_S2_L001_R1_001.fastq.gz - path: output/bclconvert/flowcell/InterOp/ErrorMetricsOut.bin
md5sum: 475bc426b7cc48d0551d40e31457dc78 md5sum: 38c88def138e9bb832539911affdb286
- path: output/bclconvert/sample2_S2_L001_R2_001.fastq.gz - path: output/bclconvert/flowcell/InterOp/ExtractionMetricsOut.bin
md5sum: f670ccd7d9352e0e67fe1c1232429d94 md5sum: 7497c3178837eea8f09350b5cd252e99
- path: output/bclconvert/sample2_S2_L002_R1_001.fastq.gz - path: output/bclconvert/flowcell/InterOp/IndexMetricsOut.bin
md5sum: ebd5ff6fa5603e7d704b5a10598de58c - path: output/bclconvert/flowcell/InterOp/QMetricsOut.bin
- path: output/bclconvert/sample2_S2_L002_R2_001.fastq.gz md5sum: 7e9f198d53ebdfbb699a5f94cf1ed51c
md5sum: 2f83b460f52620d2548c7ef8845b31d7 - path: output/bclconvert/flowcell/InterOp/TileMetricsOut.bin
- path: output/stub/SampleSheet.csv md5sum: 83891751ec1c91a425a524b476b6ca3c
md5sum: c579e7d2c9c917c4cfb875a0373c0936
- path: output/bclconvert/versions.yml