CNVkit: Make targets file optional when running in WGS mode (#1030)

* Make targets.bed optional when running in wgs mode

* added test for cram

* Update test_data_config with new reference.cnn

* Update main.nf to allow tumor-only running

Still need a unit-test for this. Almost ready, but needs this file as input https://github.com/nf-core/test-datasets/blob/modules/data/generic/cnn/reference.cnn

* re-writing previous changes, but now it wont crash the entire CI-setup

* fixing overlooked merge conflict

* last overlooked merge-conflict

* move all files to batch subfolder

* adding an optional input for a reference file (needed when running germline and tumoronly)

* minor typo

* update meta.yml

* aligning code, renaming cnvkit to cnvkit_batch, renaming tumorbam to tumor, normalbam to normal

* Update pytest_modules.yml

Co-authored-by: EC2 Default User <ec2-user@ip-172-31-21-198.us-west-2.compute.internal>
Co-authored-by: Lasse Folkersen <lassefolkersen@gmail.com>
Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Florian De Temmerman 2021-11-21 20:56:57 +01:00 committed by GitHub
parent 15fd90ffe8
commit 1455498152
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 207 additions and 65 deletions

35
modules/cnvkit/main.nf → modules/cnvkit/batch/main.nf Executable file → Normal file
View file

@ -4,7 +4,7 @@ include { initOptions; saveFiles; getSoftwareName; getProcessName } from './func
params.options = [:]
options = initOptions(params.options)
process CNVKIT {
process CNVKIT_BATCH {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
@ -19,25 +19,40 @@ process CNVKIT {
}
input:
tuple val(meta), path(tumourbam), path(normalbam)
tuple val(meta), path(tumor), path(normal)
path fasta
path targetfile
path targets
path reference
output:
tuple val(meta), path("*.bed"), emit: bed
tuple val(meta), path("*.cnn"), emit: cnn
tuple val(meta), path("*.cnr"), emit: cnr
tuple val(meta), path("*.cns"), emit: cns
tuple val(meta), path("*.cnn"), emit: cnn, optional: true
tuple val(meta), path("*.cnr"), emit: cnr, optional: true
tuple val(meta), path("*.cns"), emit: cns, optional: true
path "versions.yml" , emit: versions
script:
normal_args = normal ? "--normal $normal" : ""
fasta_args = fasta ? "--fasta $fasta" : ""
reference_args = reference ? "--reference $reference" : ""
def target_args = ""
if (options.args.contains("--method wgs") || options.args.contains("-m wgs")) {
target_args = targets ? "--targets $targets" : ""
}
else {
target_args = "--targets $targets"
}
"""
cnvkit.py \\
batch \\
$tumourbam \\
--normal $normalbam\\
--fasta $fasta \\
--targets $targetfile \\
$tumor \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes ${task.cpus} \\
$options.args
cat <<-END_VERSIONS > versions.yml

View file

@ -1,4 +1,4 @@
name: cnvkit
name: cnvkit_batch
description: Copy number variant detection from high-throughput sequencing data
keywords:
- bam
@ -38,14 +38,14 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tumourbam:
- tumour:
type: file
description: |
Input tumour sample bam file
- normalbam:
Input tumour sample bam file (or cram)
- normal:
type: file
description: |
Input normal sample bam file
Input normal sample bam file (or cram)
- fasta:
type: file
description: |
@ -54,6 +54,10 @@ input:
type: file
description: |
Input target bed file
- reference:
type: file
description: |
Input reference cnn-file (only for germline and tumor-only running)
output:
- meta:
type: map
@ -85,4 +89,5 @@ authors:
- "@KevinMenden"
- "@MaxUlysse"
- "@drpatelh"
- "@fbdtemme"
- "@lassefolkersen"

View file

@ -294,9 +294,9 @@ cmseq/polymut:
- modules/cmseq/polymut/**
- tests/modules/cmseq/polymut/**
cnvkit:
- modules/cnvkit/**
- tests/modules/cnvkit/**
cnvkit/batch:
- modules/cnvkit/batch/**
- tests/modules/cnvkit/batch/**
cooler/digest:
- modules/cooler/digest/**

View file

@ -263,6 +263,9 @@ params {
'txt' {
hello = "${test_data_dir}/generic/txt/hello.txt"
}
'cnn' {
reference = "${test_data_dir}/generic/cnn/reference.cnn"
}
'cooler'{
test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz"
test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2"

View file

@ -0,0 +1,64 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] )
include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn --method wgs' ] )
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' addParams( options: [ 'args': '--method wgs' ] )
workflow test_cnvkit_hybrid {
tumor = file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
normal = file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)
input = [ [ id:'test' ], // meta map
tumor,
normal
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)
CNVKIT_HYBRID ( input, fasta, targets, [] )
}
workflow test_cnvkit_wgs {
tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
normal = file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
input = [ [ id:'test'], // meta map
tumor,
normal
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] )
}
workflow test_cnvkit_cram {
tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
normal = file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
input = [ [ id:'test'], // meta map
tumor,
normal
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] )
}
workflow test_cnvkit_tumoronly {
tumor = file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
input = [ [ id:'test'], // meta map
tumor,
[ ]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, [], [], reference )
}

View file

@ -0,0 +1,101 @@
- name: cnvkit batch test_cnvkit_hybrid
command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c tests/config/nextflow.config
tags:
- cnvkit/batch
- cnvkit
files:
- path: output/cnvkit/baits.antitarget.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/baits.target.bed
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb
- path: output/cnvkit/reference.cnn
md5sum: ac99c1ad8b917b96ae15119146c91ab9
- path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.paired_end.sorted.bintest.cns
md5sum: 6544d979475def8a9f69ba42a985668d
- path: output/cnvkit/test.paired_end.sorted.call.cns
md5sum: f2ca59b4d50b0c317adc526c1b99b622
- path: output/cnvkit/test.paired_end.sorted.cnr
md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3
- path: output/cnvkit/test.paired_end.sorted.cns
md5sum: 060af1aa637ed51812af19bcce24fcfe
- path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn
md5sum: 3fe80b6013ffc3e9968345e810158215
- path: output/cnvkit/test.single_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn
md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7
- name: cnvkit batch test_cnvkit_wgs
command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c tests/config/nextflow.config
tags:
- cnvkit/batch
- cnvkit
files:
- path: output/cnvkit/genome.antitarget.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/genome.bed
md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7
- path: output/cnvkit/genome.target.bed
md5sum: a13353ae9c8405e701390c069255bbd2
- path: output/cnvkit/reference.cnn
md5sum: 05c6211e0179885b8a83e44fd21d5f86
- path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn
md5sum: ff526714696aa49bdc1dc8d00d965266
- path: output/cnvkit/test2.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test2.paired_end.sorted.bintest.cns
md5sum: 6544d979475def8a9f69ba42a985668d
- path: output/cnvkit/test2.paired_end.sorted.call.cns
md5sum: f6de754c34f780e6befee5b3ff0893f8
- path: output/cnvkit/test2.paired_end.sorted.cnr
md5sum: 80318d06c6b095945a0fb0e85e887cbc
- path: output/cnvkit/test2.paired_end.sorted.cns
md5sum: 76afa47afc4bd5de35aee8fdb54d3d3a
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_cram
command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_cram -c tests/config/nextflow.config
tags:
- cnvkit/batch
- cnvkit
files:
- path: output/cnvkit/genome.antitarget.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/genome.bed
md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7
- path: output/cnvkit/genome.target.bed
md5sum: a13353ae9c8405e701390c069255bbd2
- path: output/cnvkit/reference.cnn
md5sum: 05c6211e0179885b8a83e44fd21d5f86
- path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn
md5sum: ff526714696aa49bdc1dc8d00d965266
- path: output/cnvkit/test2.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test2.paired_end.sorted.bintest.cns
md5sum: 6544d979475def8a9f69ba42a985668d
- path: output/cnvkit/test2.paired_end.sorted.call.cns
md5sum: f6de754c34f780e6befee5b3ff0893f8
- path: output/cnvkit/test2.paired_end.sorted.cnr
md5sum: 80318d06c6b095945a0fb0e85e887cbc
- path: output/cnvkit/test2.paired_end.sorted.cns
md5sum: 76afa47afc4bd5de35aee8fdb54d3d3a
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_tumoronly
command: nextflow run tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c tests/config/nextflow.config
tags:
- cnvkit/batch
- cnvkit
files:
- path: output/cnvkit/reference.antitarget-tmp.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/reference.target-tmp.bed
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb

View file

@ -1,19 +0,0 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CNVKIT } from '../../../modules/cnvkit/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] )
workflow test_cnvkit {
tumourbam = file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
normalbam = file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)
input = [ [ id:'test' ], // meta map
tumourbam,
normalbam
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)
CNVKIT ( input, fasta, targets )
}

View file

@ -1,27 +0,0 @@
- name: cnvkit
command: nextflow run ./tests/modules/cnvkit/ -entry test_cnvkit -c tests/config/nextflow.config
tags:
- cnvkit
files:
- path: output/cnvkit/baits.target.bed
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb
- path: output/cnvkit/baits.antitarget.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/reference.cnn
md5sum: ac99c1ad8b917b96ae15119146c91ab9
- path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn
md5sum: 3fe80b6013ffc3e9968345e810158215
- path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn
md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7
- path: output/cnvkit/test.single_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test.paired_end.sorted.cnr
md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3
- path: output/cnvkit/test.paired_end.sorted.cns
md5sum: 060af1aa637ed51812af19bcce24fcfe
- path: output/cnvkit/test.paired_end.sorted.bintest.cns
md5sum: 6544d979475def8a9f69ba42a985668d
- path: output/cnvkit/test.paired_end.sorted.call.cns
md5sum: f2ca59b4d50b0c317adc526c1b99b622