From 14554981528013409f13ce4fb5b638ce87cb9828 Mon Sep 17 00:00:00 2001 From: Florian De Temmerman <69114541+fbdtemme@users.noreply.github.com> Date: Sun, 21 Nov 2021 20:56:57 +0100 Subject: [PATCH] CNVkit: Make targets file optional when running in WGS mode (#1030) * Make targets.bed optional when running in wgs mode * added test for cram * Update test_data_config with new reference.cnn * Update main.nf to allow tumor-only running Still need a unit-test for this. Almost ready, but needs this file as input https://github.com/nf-core/test-datasets/blob/modules/data/generic/cnn/reference.cnn * re-writing previous changes, but now it wont crash the entire CI-setup * fixing overlooked merge conflict * last overlooked merge-conflict * move all files to batch subfolder * adding an optional input for a reference file (needed when running germline and tumoronly) * minor typo * update meta.yml * aligning code, renaming cnvkit to cnvkit_batch, renaming tumorbam to tumor, normalbam to normal * Update pytest_modules.yml Co-authored-by: EC2 Default User Co-authored-by: Lasse Folkersen Co-authored-by: Robert A. Petit III Co-authored-by: Harshil Patel --- modules/cnvkit/{ => batch}/functions.nf | 0 modules/cnvkit/{ => batch}/main.nf | 35 +++++--- modules/cnvkit/{ => batch}/meta.yml | 17 ++-- tests/config/pytest_modules.yml | 6 +- tests/config/test_data.config | 3 + tests/modules/cnvkit/batch/main.nf | 64 +++++++++++++++ tests/modules/cnvkit/batch/test.yml | 101 ++++++++++++++++++++++++ tests/modules/cnvkit/main.nf | 19 ----- tests/modules/cnvkit/test.yml | 27 ------- 9 files changed, 207 insertions(+), 65 deletions(-) rename modules/cnvkit/{ => batch}/functions.nf (100%) rename modules/cnvkit/{ => batch}/main.nf (59%) mode change 100755 => 100644 rename modules/cnvkit/{ => batch}/meta.yml (89%) mode change 100755 => 100644 create mode 100755 tests/modules/cnvkit/batch/main.nf create mode 100755 tests/modules/cnvkit/batch/test.yml delete mode 100755 tests/modules/cnvkit/main.nf delete mode 100755 tests/modules/cnvkit/test.yml diff --git a/modules/cnvkit/functions.nf b/modules/cnvkit/batch/functions.nf similarity index 100% rename from modules/cnvkit/functions.nf rename to modules/cnvkit/batch/functions.nf diff --git a/modules/cnvkit/main.nf b/modules/cnvkit/batch/main.nf old mode 100755 new mode 100644 similarity index 59% rename from modules/cnvkit/main.nf rename to modules/cnvkit/batch/main.nf index 27c8bb0d..06ecaa40 --- a/modules/cnvkit/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -4,7 +4,7 @@ include { initOptions; saveFiles; getSoftwareName; getProcessName } from './func params.options = [:] options = initOptions(params.options) -process CNVKIT { +process CNVKIT_BATCH { tag "$meta.id" label 'process_low' publishDir "${params.outdir}", @@ -19,25 +19,40 @@ process CNVKIT { } input: - tuple val(meta), path(tumourbam), path(normalbam) + tuple val(meta), path(tumor), path(normal) path fasta - path targetfile + path targets + path reference output: tuple val(meta), path("*.bed"), emit: bed - tuple val(meta), path("*.cnn"), emit: cnn - tuple val(meta), path("*.cnr"), emit: cnr - tuple val(meta), path("*.cns"), emit: cns + tuple val(meta), path("*.cnn"), emit: cnn, optional: true + tuple val(meta), path("*.cnr"), emit: cnr, optional: true + tuple val(meta), path("*.cns"), emit: cns, optional: true path "versions.yml" , emit: versions script: + normal_args = normal ? "--normal $normal" : "" + fasta_args = fasta ? "--fasta $fasta" : "" + reference_args = reference ? "--reference $reference" : "" + + def target_args = "" + if (options.args.contains("--method wgs") || options.args.contains("-m wgs")) { + target_args = targets ? "--targets $targets" : "" + } + else { + target_args = "--targets $targets" + } + """ cnvkit.py \\ batch \\ - $tumourbam \\ - --normal $normalbam\\ - --fasta $fasta \\ - --targets $targetfile \\ + $tumor \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes ${task.cpus} \\ $options.args cat <<-END_VERSIONS > versions.yml diff --git a/modules/cnvkit/meta.yml b/modules/cnvkit/batch/meta.yml old mode 100755 new mode 100644 similarity index 89% rename from modules/cnvkit/meta.yml rename to modules/cnvkit/batch/meta.yml index 3e760d16..0d263041 --- a/modules/cnvkit/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -1,4 +1,4 @@ -name: cnvkit +name: cnvkit_batch description: Copy number variant detection from high-throughput sequencing data keywords: - bam @@ -38,14 +38,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - tumourbam: + - tumour: type: file description: | - Input tumour sample bam file - - normalbam: + Input tumour sample bam file (or cram) + - normal: type: file description: | - Input normal sample bam file + Input normal sample bam file (or cram) - fasta: type: file description: | @@ -54,6 +54,10 @@ input: type: file description: | Input target bed file + - reference: + type: file + description: | + Input reference cnn-file (only for germline and tumor-only running) output: - meta: type: map @@ -85,4 +89,5 @@ authors: - "@KevinMenden" - "@MaxUlysse" - "@drpatelh" - + - "@fbdtemme" + - "@lassefolkersen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 69d6a80e..7b47bfea 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -294,9 +294,9 @@ cmseq/polymut: - modules/cmseq/polymut/** - tests/modules/cmseq/polymut/** -cnvkit: - - modules/cnvkit/** - - tests/modules/cnvkit/** +cnvkit/batch: + - modules/cnvkit/batch/** + - tests/modules/cnvkit/batch/** cooler/digest: - modules/cooler/digest/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 3351204d..c3bae012 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -263,6 +263,9 @@ params { 'txt' { hello = "${test_data_dir}/generic/txt/hello.txt" } + 'cnn' { + reference = "${test_data_dir}/generic/cnn/reference.cnn" + } 'cooler'{ test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf new file mode 100755 index 00000000..5d92afaa --- /dev/null +++ b/tests/modules/cnvkit/batch/main.nf @@ -0,0 +1,64 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] ) +include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn --method wgs' ] ) +include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--method wgs' ] ) + + +workflow test_cnvkit_hybrid { + tumor = file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + normal = file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + + input = [ [ id:'test' ], // meta map + tumor, + normal + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) + + CNVKIT_HYBRID ( input, fasta, targets, [] ) +} + +workflow test_cnvkit_wgs { + tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + normal = file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + + input = [ [ id:'test'], // meta map + tumor, + normal + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + CNVKIT_WGS ( input, fasta, [], [] ) +} + + +workflow test_cnvkit_cram { + tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + normal = file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + + input = [ [ id:'test'], // meta map + tumor, + normal + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + CNVKIT_WGS ( input, fasta, [], [] ) +} + + + +workflow test_cnvkit_tumoronly { + tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + + input = [ [ id:'test'], // meta map + tumor, + [ ] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + + CNVKIT_TUMORONLY ( input, [], [], reference ) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml new file mode 100755 index 00000000..96ea670c --- /dev/null +++ b/tests/modules/cnvkit/batch/test.yml @@ -0,0 +1,101 @@ +- name: cnvkit batch test_cnvkit_hybrid + command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c tests/config/nextflow.config + tags: + - cnvkit/batch + - cnvkit + files: + - path: output/cnvkit/baits.antitarget.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/cnvkit/baits.target.bed + md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference.cnn + md5sum: ac99c1ad8b917b96ae15119146c91ab9 + - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test.paired_end.sorted.bintest.cns + md5sum: 6544d979475def8a9f69ba42a985668d + - path: output/cnvkit/test.paired_end.sorted.call.cns + md5sum: f2ca59b4d50b0c317adc526c1b99b622 + - path: output/cnvkit/test.paired_end.sorted.cnr + md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3 + - path: output/cnvkit/test.paired_end.sorted.cns + md5sum: 060af1aa637ed51812af19bcce24fcfe + - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn + md5sum: 3fe80b6013ffc3e9968345e810158215 + - path: output/cnvkit/test.single_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn + md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 + +- name: cnvkit batch test_cnvkit_wgs + command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c tests/config/nextflow.config + tags: + - cnvkit/batch + - cnvkit + files: + - path: output/cnvkit/genome.antitarget.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/cnvkit/genome.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: output/cnvkit/genome.target.bed + md5sum: a13353ae9c8405e701390c069255bbd2 + - path: output/cnvkit/reference.cnn + md5sum: 05c6211e0179885b8a83e44fd21d5f86 + - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn + md5sum: ff526714696aa49bdc1dc8d00d965266 + - path: output/cnvkit/test2.paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test2.paired_end.sorted.bintest.cns + md5sum: 6544d979475def8a9f69ba42a985668d + - path: output/cnvkit/test2.paired_end.sorted.call.cns + md5sum: f6de754c34f780e6befee5b3ff0893f8 + - path: output/cnvkit/test2.paired_end.sorted.cnr + md5sum: 80318d06c6b095945a0fb0e85e887cbc + - path: output/cnvkit/test2.paired_end.sorted.cns + md5sum: 76afa47afc4bd5de35aee8fdb54d3d3a + - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn + md5sum: 6ae6b3fce7299eedca6133d911c38fe1 + +- name: cnvkit batch test_cnvkit_cram + command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_cram -c tests/config/nextflow.config + tags: + - cnvkit/batch + - cnvkit + files: + - path: output/cnvkit/genome.antitarget.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/cnvkit/genome.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: output/cnvkit/genome.target.bed + md5sum: a13353ae9c8405e701390c069255bbd2 + - path: output/cnvkit/reference.cnn + md5sum: 05c6211e0179885b8a83e44fd21d5f86 + - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn + md5sum: ff526714696aa49bdc1dc8d00d965266 + - path: output/cnvkit/test2.paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test2.paired_end.sorted.bintest.cns + md5sum: 6544d979475def8a9f69ba42a985668d + - path: output/cnvkit/test2.paired_end.sorted.call.cns + md5sum: f6de754c34f780e6befee5b3ff0893f8 + - path: output/cnvkit/test2.paired_end.sorted.cnr + md5sum: 80318d06c6b095945a0fb0e85e887cbc + - path: output/cnvkit/test2.paired_end.sorted.cns + md5sum: 76afa47afc4bd5de35aee8fdb54d3d3a + - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn + md5sum: 6ae6b3fce7299eedca6133d911c38fe1 + +- name: cnvkit batch test_cnvkit_tumoronly + command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c tests/config/nextflow.config + tags: + - cnvkit/batch + - cnvkit + files: + - path: output/cnvkit/reference.antitarget-tmp.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/cnvkit/reference.target-tmp.bed + md5sum: 26d25ff2d6c45b6d92169b3559c6acdb diff --git a/tests/modules/cnvkit/main.nf b/tests/modules/cnvkit/main.nf deleted file mode 100755 index 6ee959ab..00000000 --- a/tests/modules/cnvkit/main.nf +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { CNVKIT } from '../../../modules/cnvkit/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] ) - -workflow test_cnvkit { - tumourbam = file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - normalbam = file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - - input = [ [ id:'test' ], // meta map - tumourbam, - normalbam - ] - fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) - - CNVKIT ( input, fasta, targets ) -} diff --git a/tests/modules/cnvkit/test.yml b/tests/modules/cnvkit/test.yml deleted file mode 100755 index 6e09d6f3..00000000 --- a/tests/modules/cnvkit/test.yml +++ /dev/null @@ -1,27 +0,0 @@ -- name: cnvkit - command: nextflow run ./tests/modules/cnvkit/ -entry test_cnvkit -c tests/config/nextflow.config - tags: - - cnvkit - files: - - path: output/cnvkit/baits.target.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb - - path: output/cnvkit/baits.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: output/cnvkit/reference.cnn - md5sum: ac99c1ad8b917b96ae15119146c91ab9 - - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn - md5sum: 3fe80b6013ffc3e9968345e810158215 - - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn - md5sum: 203caf8cef6935bb50b4138097955cb8 - - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn - md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 - - path: output/cnvkit/test.single_end.sorted.antitargetcoverage.cnn - md5sum: 203caf8cef6935bb50b4138097955cb8 - - path: output/cnvkit/test.paired_end.sorted.cnr - md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3 - - path: output/cnvkit/test.paired_end.sorted.cns - md5sum: 060af1aa637ed51812af19bcce24fcfe - - path: output/cnvkit/test.paired_end.sorted.bintest.cns - md5sum: 6544d979475def8a9f69ba42a985668d - - path: output/cnvkit/test.paired_end.sorted.call.cns - md5sum: f2ca59b4d50b0c317adc526c1b99b622