adjust cnvkit/batch for germline mode

This commit is contained in:
SusiJo 2022-06-01 17:35:02 +02:00
parent 0708f343de
commit f6953b5147
4 changed files with 104 additions and 25 deletions

View file

@ -28,34 +28,48 @@ process CNVKIT_BATCH {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_exists = tumor ? true : false
// input pair is assumed to have same extension if both exist def normal_exists = normal ? true : false
def is_cram = tumor.Extension == "cram" ? true : false
def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false
def normal_cram = normal_exists && normal.Extension == "cram" ? true : false
def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}"
// do not run samtools on normal samples in tumor_only mode
def normal_exists = normal ? true: false
// tumor_only mode does not need fasta & target // tumor_only mode does not need fasta & target
// instead it requires a pre-computed reference.cnn which is built from fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target
def (normal_out, normal_args, fasta_args) = ["", "", ""] def (normal_out, normal_args, fasta_args) = ["", "", ""]
if (normal_exists){ if (normal_exists){
def normal_prefix = normal.BaseName def normal_prefix = normal.BaseName
normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}"
normal_args = normal_prefix ? "--normal $normal_out" : ""
fasta_args = fasta ? "--fasta $fasta" : "" fasta_args = fasta ? "--fasta $fasta" : ""
// germline mode
// normal samples must be input without a flag
// requires flag --normal to be empty
if(!tumor_exists){
tumor_out = normal.BaseName + ".bam"
normal_args = "--normal "
}
// somatic mode
else {
normal_args = normal_prefix ? "--normal $normal_out" : ""
}
} }
def target_args = targets ? "--targets $targets" : "" def target_args = targets ? "--targets $targets" : ""
def reference_args = reference ? "--reference $reference" : "" def reference_args = reference ? "--reference $reference" : ""
""" """
if $is_cram; then if $tumor_cram; then
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out
if $normal_exists; then fi
if $normal_cram; then
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
fi fi
fi
cnvkit.py \\ cnvkit.py \\
batch \\ batch \\

View file

@ -23,6 +23,8 @@ params {
test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12" test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12"
baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed" baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed"
reference_cnn = "${test_data_dir}/genomics/sarscov2/genome/cnn/reference.cnn"
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2" kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz" kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
@ -145,6 +147,7 @@ params {
genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz"
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
genome_21_reference_cnn = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn"
dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
@ -414,9 +417,6 @@ params {
'txt' { 'txt' {
hello = "${test_data_dir}/generic/txt/hello.txt" hello = "${test_data_dir}/generic/txt/hello.txt"
} }
'cnn' {
reference = "${test_data_dir}/generic/cnn/reference.cnn"
}
'cooler'{ 'cooler'{
test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz"
test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2"

View file

@ -5,6 +5,7 @@ nextflow.enable.dsl = 2
include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf'
include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf'
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf'
include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf'
workflow test_cnvkit_hybrid { workflow test_cnvkit_hybrid {
@ -47,10 +48,10 @@ workflow test_cnvkit_tumoronly {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
[] []
] ]
reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, [], [], reference ) CNVKIT_TUMORONLY ( input, [], [], reference )
} }
@ -59,11 +60,24 @@ workflow test_cnvkit_tumoronly_cram {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
[] []
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, fasta, [], reference ) CNVKIT_TUMORONLY ( input, fasta, [], reference )
} }
workflow test_cnvkit_germline_cram {
input = [
[ id:'test'], // meta map
[],
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
CNVKIT_GERMLINE ( input, fasta, targets, [])
}

View file

@ -92,9 +92,22 @@
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
files: files:
- path: output/cnvkit/reference.antitarget-tmp.bed - path: output/cnvkit/reference_chr21.antitarget-tmp.bed
- path: output/cnvkit/reference.target-tmp.bed md5sum: 3d4d20f9f23b39970865d29ef239d20b
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb - path: output/cnvkit/reference_chr21.target-tmp.bed
md5sum: 657b25dbda8516624efa8cb2cf3716ca
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
md5sum: 067115082c4af4b64d58c0dc3a3642e4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
md5sum: f6adc75a0a86b7a921eca1b79a394cb0
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
md5sum: f7caeca04aba28b125ce26b511f42afb
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
md5sum: d9bdb71ce807051369577ee7f807a40c
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
md5sum: 2b56aac606ba6183d018b30ca58afcec
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_tumoronly_cram - name: cnvkit batch test_cnvkit_tumoronly_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
@ -102,6 +115,44 @@
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
files: files:
- path: output/cnvkit/reference.antitarget-tmp.bed - path: output/cnvkit/reference_chr21.antitarget-tmp.bed
- path: output/cnvkit/reference.target-tmp.bed md5sum: 3d4d20f9f23b39970865d29ef239d20b
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb - path: output/cnvkit/reference_chr21.target-tmp.bed
md5sum: 657b25dbda8516624efa8cb2cf3716ca
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
md5sum: 067115082c4af4b64d58c0dc3a3642e4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
md5sum: f6adc75a0a86b7a921eca1b79a394cb0
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
md5sum: f7caeca04aba28b125ce26b511f42afb
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
md5sum: d9bdb71ce807051369577ee7f807a40c
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
md5sum: 2b56aac606ba6183d018b30ca58afcec
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_germline_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
files:
- path: output/cnvkit/multi_intervals.antitarget.bed
md5sum: 3d4d20f9f23b39970865d29ef239d20b
- path: output/cnvkit/multi_intervals.target.bed
md5sum: 86d30493bb2e619a93f4ebc2923d29f3
- path: output/cnvkit/reference.cnn
md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
md5sum: 067115082c4af4b64d58c0dc3a3642e4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
md5sum: 68b62b75cd91b2ffe5633686fb943490
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
md5sum: df196edd72613c59186f4d87df3dc4a4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
md5sum: 3b4fc0cc73be78f978cfe2422470753e
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: b4a49faf170e436ec32dcc21ccc3ce8f