mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
adjust cnvkit/batch for germline mode
This commit is contained in:
parent
0708f343de
commit
f6953b5147
4 changed files with 104 additions and 25 deletions
|
@ -28,34 +28,48 @@ process CNVKIT_BATCH {
|
||||||
script:
|
script:
|
||||||
def args = task.ext.args ?: ''
|
def args = task.ext.args ?: ''
|
||||||
|
|
||||||
// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
|
def tumor_exists = tumor ? true : false
|
||||||
// input pair is assumed to have same extension if both exist
|
|
||||||
def is_cram = tumor.Extension == "cram" ? true : false
|
|
||||||
def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}"
|
|
||||||
|
|
||||||
// do not run samtools on normal samples in tumor_only mode
|
|
||||||
def normal_exists = normal ? true : false
|
def normal_exists = normal ? true : false
|
||||||
|
|
||||||
|
// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
|
||||||
|
def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false
|
||||||
|
def normal_cram = normal_exists && normal.Extension == "cram" ? true : false
|
||||||
|
|
||||||
|
def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}"
|
||||||
|
|
||||||
// tumor_only mode does not need fasta & target
|
// tumor_only mode does not need fasta & target
|
||||||
// instead it requires a pre-computed reference.cnn which is built from fasta & target
|
// instead it requires a pre-computed reference.cnn which is built from fasta & target
|
||||||
def (normal_out, normal_args, fasta_args) = ["", "", ""]
|
def (normal_out, normal_args, fasta_args) = ["", "", ""]
|
||||||
|
|
||||||
if (normal_exists){
|
if (normal_exists){
|
||||||
def normal_prefix = normal.BaseName
|
def normal_prefix = normal.BaseName
|
||||||
normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}"
|
normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}"
|
||||||
normal_args = normal_prefix ? "--normal $normal_out" : ""
|
|
||||||
fasta_args = fasta ? "--fasta $fasta" : ""
|
fasta_args = fasta ? "--fasta $fasta" : ""
|
||||||
|
|
||||||
|
// germline mode
|
||||||
|
// normal samples must be input without a flag
|
||||||
|
// requires flag --normal to be empty
|
||||||
|
if(!tumor_exists){
|
||||||
|
tumor_out = normal.BaseName + ".bam"
|
||||||
|
normal_args = "--normal "
|
||||||
|
}
|
||||||
|
// somatic mode
|
||||||
|
else {
|
||||||
|
normal_args = normal_prefix ? "--normal $normal_out" : ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def target_args = targets ? "--targets $targets" : ""
|
def target_args = targets ? "--targets $targets" : ""
|
||||||
def reference_args = reference ? "--reference $reference" : ""
|
def reference_args = reference ? "--reference $reference" : ""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if $is_cram; then
|
if $tumor_cram; then
|
||||||
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out
|
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out
|
||||||
if $normal_exists; then
|
fi
|
||||||
|
if $normal_cram; then
|
||||||
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
|
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
|
|
||||||
cnvkit.py \\
|
cnvkit.py \\
|
||||||
batch \\
|
batch \\
|
||||||
|
|
|
@ -23,6 +23,8 @@ params {
|
||||||
test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12"
|
test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12"
|
||||||
baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed"
|
baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed"
|
||||||
|
|
||||||
|
reference_cnn = "${test_data_dir}/genomics/sarscov2/genome/cnn/reference.cnn"
|
||||||
|
|
||||||
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
|
kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2"
|
||||||
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
|
kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz"
|
||||||
|
|
||||||
|
@ -145,6 +147,7 @@ params {
|
||||||
genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz"
|
genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz"
|
||||||
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
|
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
|
||||||
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
|
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
|
||||||
|
genome_21_reference_cnn = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn"
|
||||||
|
|
||||||
dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
|
dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites"
|
||||||
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
|
||||||
|
@ -414,9 +417,6 @@ params {
|
||||||
'txt' {
|
'txt' {
|
||||||
hello = "${test_data_dir}/generic/txt/hello.txt"
|
hello = "${test_data_dir}/generic/txt/hello.txt"
|
||||||
}
|
}
|
||||||
'cnn' {
|
|
||||||
reference = "${test_data_dir}/generic/cnn/reference.cnn"
|
|
||||||
}
|
|
||||||
'cooler'{
|
'cooler'{
|
||||||
test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz"
|
test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz"
|
||||||
test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2"
|
test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2"
|
||||||
|
|
|
@ -5,6 +5,7 @@ nextflow.enable.dsl = 2
|
||||||
include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf'
|
include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf'
|
||||||
include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf'
|
include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf'
|
||||||
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf'
|
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf'
|
||||||
|
include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf'
|
||||||
|
|
||||||
workflow test_cnvkit_hybrid {
|
workflow test_cnvkit_hybrid {
|
||||||
|
|
||||||
|
@ -47,10 +48,10 @@ workflow test_cnvkit_tumoronly {
|
||||||
|
|
||||||
input = [
|
input = [
|
||||||
[ id:'test'], // meta map
|
[ id:'test'], // meta map
|
||||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
|
||||||
[]
|
[]
|
||||||
]
|
]
|
||||||
reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true)
|
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
|
||||||
|
|
||||||
CNVKIT_TUMORONLY ( input, [], [], reference )
|
CNVKIT_TUMORONLY ( input, [], [], reference )
|
||||||
}
|
}
|
||||||
|
@ -59,11 +60,24 @@ workflow test_cnvkit_tumoronly_cram {
|
||||||
|
|
||||||
input = [
|
input = [
|
||||||
[ id:'test'], // meta map
|
[ id:'test'], // meta map
|
||||||
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
|
||||||
[]
|
[]
|
||||||
]
|
]
|
||||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||||
reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true)
|
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
|
||||||
|
|
||||||
CNVKIT_TUMORONLY ( input, fasta, [], reference )
|
CNVKIT_TUMORONLY ( input, fasta, [], reference )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
workflow test_cnvkit_germline_cram {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test'], // meta map
|
||||||
|
[],
|
||||||
|
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
|
||||||
|
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
|
||||||
|
|
||||||
|
CNVKIT_GERMLINE ( input, fasta, targets, [])
|
||||||
|
}
|
||||||
|
|
|
@ -92,9 +92,22 @@
|
||||||
- cnvkit
|
- cnvkit
|
||||||
- cnvkit/batch
|
- cnvkit/batch
|
||||||
files:
|
files:
|
||||||
- path: output/cnvkit/reference.antitarget-tmp.bed
|
- path: output/cnvkit/reference_chr21.antitarget-tmp.bed
|
||||||
- path: output/cnvkit/reference.target-tmp.bed
|
md5sum: 3d4d20f9f23b39970865d29ef239d20b
|
||||||
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb
|
- path: output/cnvkit/reference_chr21.target-tmp.bed
|
||||||
|
md5sum: 657b25dbda8516624efa8cb2cf3716ca
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
|
||||||
|
md5sum: 067115082c4af4b64d58c0dc3a3642e4
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
|
||||||
|
md5sum: f6adc75a0a86b7a921eca1b79a394cb0
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
|
||||||
|
md5sum: f7caeca04aba28b125ce26b511f42afb
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
|
||||||
|
md5sum: d9bdb71ce807051369577ee7f807a40c
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
|
||||||
|
md5sum: 2b56aac606ba6183d018b30ca58afcec
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
|
||||||
|
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
|
||||||
|
|
||||||
- name: cnvkit batch test_cnvkit_tumoronly_cram
|
- name: cnvkit batch test_cnvkit_tumoronly_cram
|
||||||
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
|
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
|
||||||
|
@ -102,6 +115,44 @@
|
||||||
- cnvkit
|
- cnvkit
|
||||||
- cnvkit/batch
|
- cnvkit/batch
|
||||||
files:
|
files:
|
||||||
- path: output/cnvkit/reference.antitarget-tmp.bed
|
- path: output/cnvkit/reference_chr21.antitarget-tmp.bed
|
||||||
- path: output/cnvkit/reference.target-tmp.bed
|
md5sum: 3d4d20f9f23b39970865d29ef239d20b
|
||||||
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb
|
- path: output/cnvkit/reference_chr21.target-tmp.bed
|
||||||
|
md5sum: 657b25dbda8516624efa8cb2cf3716ca
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
|
||||||
|
md5sum: 067115082c4af4b64d58c0dc3a3642e4
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
|
||||||
|
md5sum: f6adc75a0a86b7a921eca1b79a394cb0
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
|
||||||
|
md5sum: f7caeca04aba28b125ce26b511f42afb
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
|
||||||
|
md5sum: d9bdb71ce807051369577ee7f807a40c
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
|
||||||
|
md5sum: 2b56aac606ba6183d018b30ca58afcec
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
|
||||||
|
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
|
||||||
|
|
||||||
|
- name: cnvkit batch test_cnvkit_germline_cram
|
||||||
|
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
|
||||||
|
tags:
|
||||||
|
- cnvkit
|
||||||
|
- cnvkit/batch
|
||||||
|
files:
|
||||||
|
- path: output/cnvkit/multi_intervals.antitarget.bed
|
||||||
|
md5sum: 3d4d20f9f23b39970865d29ef239d20b
|
||||||
|
- path: output/cnvkit/multi_intervals.target.bed
|
||||||
|
md5sum: 86d30493bb2e619a93f4ebc2923d29f3
|
||||||
|
- path: output/cnvkit/reference.cnn
|
||||||
|
md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
|
||||||
|
md5sum: 067115082c4af4b64d58c0dc3a3642e4
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
|
||||||
|
md5sum: 68b62b75cd91b2ffe5633686fb943490
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
|
||||||
|
md5sum: df196edd72613c59186f4d87df3dc4a4
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
|
||||||
|
md5sum: 3b4fc0cc73be78f978cfe2422470753e
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
|
||||||
|
md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4
|
||||||
|
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
|
||||||
|
md5sum: b4a49faf170e436ec32dcc21ccc3ce8f
|
||||||
|
|
Loading…
Reference in a new issue