added fasta_fai as input + changed if statements

This commit is contained in:
SusiJo 2022-06-02 12:57:50 +02:00
parent 7742661784
commit 2f3ddc90fb
4 changed files with 200 additions and 49 deletions

View file

@ -10,6 +10,7 @@ process CNVKIT_BATCH {
input:
tuple val(meta), path(tumor), path(normal)
path fasta
path fasta_fai
path targets
path reference
@ -34,12 +35,15 @@ process CNVKIT_BATCH {
// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false
def normal_cram = normal_exists && normal.Extension == "cram" ? true : false
def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false
def normal_bam = normal_exists && normal.Extension == "bam" ? true : false
def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}"
// tumor_only mode does not need fasta & target
// instead it requires a pre-computed reference.cnn which is built from fasta & target
def (normal_out, normal_args, fasta_args) = ["", "", ""]
def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : ""
if (normal_exists){
def normal_prefix = normal.BaseName
@ -48,9 +52,9 @@ process CNVKIT_BATCH {
// germline mode
// normal samples must be input without a flag
// requires flag --normal to be empty
// requires flag --normal to be empty []
if(!tumor_exists){
tumor_out = normal.BaseName + ".bam"
tumor_out = "${normal_prefix}" + ".bam"
normal_args = "--normal "
}
// somatic mode
@ -62,28 +66,130 @@ process CNVKIT_BATCH {
def target_args = targets ? "--targets $targets" : ""
def reference_args = reference ? "--reference $reference" : ""
"""
if $tumor_cram; then
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out
fi
if $normal_cram; then
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
fi
// somatic_mode cram_input
if (tumor_cram && normal_cram){
"""
samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// somatic_mode bam_input
else if (tumor_bam && normal_bam){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only_mode cram_input
else if(tumor_cram && !normal_exists){
"""
samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only bam_input
else if(tumor_bam && !normal_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode cram_input
// normal_args must be --normal []
else if (normal_cram && !tumor_exists){
"""
samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode bam_input
else if (normal_bam && !tumor_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}

View file

@ -29,6 +29,10 @@ input:
type: file
description: |
Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided)
- fasta_fai:
type: file
description: |
Input reference genome fasta index (optional, but recommended for cram_input)
- targetfile:
type: file
description: |

View file

@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf'
include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf'
workflow test_cnvkit_hybrid {
workflow test_cnvkit_hybrid_somatic {
input = [
[ id:'test' ], // meta map
@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)
CNVKIT_HYBRID ( input, fasta, targets, [] )
CNVKIT_HYBRID ( input, fasta, [], targets, [] )
}
workflow test_cnvkit_wgs {
workflow test_cnvkit_wgs_somatic {
input = [
[ id:'test'], // meta map
@ -29,22 +29,24 @@ workflow test_cnvkit_wgs {
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] )
CNVKIT_WGS ( input, fasta, [], [], [] )
}
workflow test_cnvkit_cram {
workflow test_cnvkit_cram_wgs_somatic {
input = [
[ id:'test'], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] )
CNVKIT_WGS ( input, fasta, fasta_fai, [], [] )
}
workflow test_cnvkit_tumoronly {
workflow test_cnvkit_tumoronly_hybrid_bam {
input = [
[ id:'test'], // meta map
@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly {
]
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, [], [], reference )
CNVKIT_TUMORONLY ( input, [], [], [], reference )
}
workflow test_cnvkit_tumoronly_cram {
workflow test_cnvkit_tumoronly_hybrid_cram {
input = [
[ id:'test'], // meta map
@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, fasta, [], reference )
CNVKIT_TUMORONLY ( input, fasta, [], [], reference )
}
workflow test_cnvkit_germline_cram {
workflow test_cnvkit_germline_hybrid_cram {
input = [
[ id:'test'], // meta map
@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram {
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
CNVKIT_GERMLINE ( input, fasta, targets, [])
CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, [])
}
workflow test_cnvkit_germline_hybrid_bam {
input = [
[ id:'test'], // meta map
[],
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
CNVKIT_GERMLINE ( input, fasta, [], targets, [])
}

View file

@ -1,5 +1,5 @@
- name: cnvkit batch test_cnvkit_hybrid
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_hybrid_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
@ -26,8 +26,8 @@
- path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn
md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7
- name: cnvkit batch test_cnvkit_wgs
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_wgs_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
@ -56,8 +56,8 @@
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_cram_wgs_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
@ -86,8 +86,8 @@
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_tumoronly
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
@ -109,8 +109,8 @@
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_tumoronly_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
@ -132,8 +132,33 @@
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_germline_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
- name: cnvkit batch test_cnvkit_germline_hybrid_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
files:
- path: output/cnvkit/multi_intervals.antitarget.bed
md5sum: 3d4d20f9f23b39970865d29ef239d20b
- path: output/cnvkit/multi_intervals.target.bed
md5sum: 86d30493bb2e619a93f4ebc2923d29f3
- path: output/cnvkit/reference.cnn
md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
md5sum: 067115082c4af4b64d58c0dc3a3642e4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
md5sum: 68b62b75cd91b2ffe5633686fb943490
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
md5sum: df196edd72613c59186f4d87df3dc4a4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
md5sum: 3b4fc0cc73be78f978cfe2422470753e
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: b4a49faf170e436ec32dcc21ccc3ce8f
- name: cnvkit batch test_cnvkit_germline_hybrid_bam
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch