added fasta_fai as input + changed if statements

This commit is contained in:
SusiJo 2022-06-02 12:57:50 +02:00
parent 7742661784
commit 2f3ddc90fb
4 changed files with 200 additions and 49 deletions

View file

@ -10,6 +10,7 @@ process CNVKIT_BATCH {
input: input:
tuple val(meta), path(tumor), path(normal) tuple val(meta), path(tumor), path(normal)
path fasta path fasta
path fasta_fai
path targets path targets
path reference path reference
@ -34,12 +35,15 @@ process CNVKIT_BATCH {
// execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow
def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false
def normal_cram = normal_exists && normal.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false
def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false
def normal_bam = normal_exists && normal.Extension == "bam" ? true : false
def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}"
// tumor_only mode does not need fasta & target // tumor_only mode does not need fasta & target
// instead it requires a pre-computed reference.cnn which is built from fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target
def (normal_out, normal_args, fasta_args) = ["", "", ""] def (normal_out, normal_args, fasta_args) = ["", "", ""]
def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : ""
if (normal_exists){ if (normal_exists){
def normal_prefix = normal.BaseName def normal_prefix = normal.BaseName
@ -48,9 +52,9 @@ process CNVKIT_BATCH {
// germline mode // germline mode
// normal samples must be input without a flag // normal samples must be input without a flag
// requires flag --normal to be empty // requires flag --normal to be empty []
if(!tumor_exists){ if(!tumor_exists){
tumor_out = normal.BaseName + ".bam" tumor_out = "${normal_prefix}" + ".bam"
normal_args = "--normal " normal_args = "--normal "
} }
// somatic mode // somatic mode
@ -62,28 +66,130 @@ process CNVKIT_BATCH {
def target_args = targets ? "--targets $targets" : "" def target_args = targets ? "--targets $targets" : ""
def reference_args = reference ? "--reference $reference" : "" def reference_args = reference ? "--reference $reference" : ""
""" // somatic_mode cram_input
if $tumor_cram; then if (tumor_cram && normal_cram){
samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out """
fi samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
if $normal_cram; then samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out
samtools view -T $fasta $normal -@ $task.cpus -o $normal_out
fi
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cnvkit.py \\ cat <<-END_VERSIONS > versions.yml
batch \\ "${task.process}":
$tumor_out \\ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
$normal_args \\ cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
$fasta_args \\ END_VERSIONS
$reference_args \\ """
$target_args \\ }
--processes $task.cpus \\ // somatic_mode bam_input
$args else if (tumor_bam && normal_bam){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only_mode cram_input
else if(tumor_cram && !normal_exists){
"""
samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// tumor_only bam_input
else if(tumor_bam && !normal_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode cram_input
// normal_args must be --normal []
else if (normal_cram && !tumor_exists){
"""
samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
// germline mode bam_input
else if (normal_bam && !tumor_exists){
"""
cnvkit.py \\
batch \\
$tumor_out \\
$normal_args \\
$fasta_args \\
$reference_args \\
$target_args \\
--processes $task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g")
END_VERSIONS
"""
} }

View file

@ -29,6 +29,10 @@ input:
type: file type: file
description: | description: |
Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided)
- fasta_fai:
type: file
description: |
Input reference genome fasta index (optional, but recommended for cram_input)
- targetfile: - targetfile:
type: file type: file
description: | description: |

View file

@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba
include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf'
include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf'
workflow test_cnvkit_hybrid { workflow test_cnvkit_hybrid_somatic {
input = [ input = [
[ id:'test' ], // meta map [ id:'test' ], // meta map
@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid {
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true)
CNVKIT_HYBRID ( input, fasta, targets, [] ) CNVKIT_HYBRID ( input, fasta, [], targets, [] )
} }
workflow test_cnvkit_wgs { workflow test_cnvkit_wgs_somatic {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
@ -29,22 +29,24 @@ workflow test_cnvkit_wgs {
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] ) CNVKIT_WGS ( input, fasta, [], [], [] )
} }
workflow test_cnvkit_cram { workflow test_cnvkit_cram_wgs_somatic {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
CNVKIT_WGS ( input, fasta, [], [] ) CNVKIT_WGS ( input, fasta, fasta_fai, [], [] )
} }
workflow test_cnvkit_tumoronly {
workflow test_cnvkit_tumoronly_hybrid_bam {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly {
] ]
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, [], [], reference ) CNVKIT_TUMORONLY ( input, [], [], [], reference )
} }
workflow test_cnvkit_tumoronly_cram { workflow test_cnvkit_tumoronly_hybrid_cram {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true)
CNVKIT_TUMORONLY ( input, fasta, [], reference ) CNVKIT_TUMORONLY ( input, fasta, [], [], reference )
} }
workflow test_cnvkit_germline_cram { workflow test_cnvkit_germline_hybrid_cram {
input = [ input = [
[ id:'test'], // meta map [ id:'test'], // meta map
@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram {
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
CNVKIT_GERMLINE ( input, fasta, targets, []) CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, [])
}
workflow test_cnvkit_germline_hybrid_bam {
input = [
[ id:'test'], // meta map
[],
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
CNVKIT_GERMLINE ( input, fasta, [], targets, [])
} }

View file

@ -1,5 +1,5 @@
- name: cnvkit batch test_cnvkit_hybrid - name: cnvkit batch test_cnvkit_hybrid_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
@ -26,8 +26,8 @@
- path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn
md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7
- name: cnvkit batch test_cnvkit_wgs - name: cnvkit batch test_cnvkit_wgs_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
@ -56,8 +56,8 @@
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1 md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_cram - name: cnvkit batch test_cnvkit_cram_wgs_somatic
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
@ -86,8 +86,8 @@
- path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn
md5sum: 6ae6b3fce7299eedca6133d911c38fe1 md5sum: 6ae6b3fce7299eedca6133d911c38fe1
- name: cnvkit batch test_cnvkit_tumoronly - name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
@ -109,8 +109,8 @@
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_tumoronly_cram - name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch
@ -132,8 +132,33 @@
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3
- name: cnvkit batch test_cnvkit_germline_cram - name: cnvkit batch test_cnvkit_germline_hybrid_cram
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags:
- cnvkit
- cnvkit/batch
files:
- path: output/cnvkit/multi_intervals.antitarget.bed
md5sum: 3d4d20f9f23b39970865d29ef239d20b
- path: output/cnvkit/multi_intervals.target.bed
md5sum: 86d30493bb2e619a93f4ebc2923d29f3
- path: output/cnvkit/reference.cnn
md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn
md5sum: 067115082c4af4b64d58c0dc3a3642e4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns
md5sum: 68b62b75cd91b2ffe5633686fb943490
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns
md5sum: df196edd72613c59186f4d87df3dc4a4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr
md5sum: 3b4fc0cc73be78f978cfe2422470753e
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns
md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4
- path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn
md5sum: b4a49faf170e436ec32dcc21ccc3ce8f
- name: cnvkit batch test_cnvkit_germline_hybrid_bam
command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config
tags: tags:
- cnvkit - cnvkit
- cnvkit/batch - cnvkit/batch