diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index b467c6f2..090f7860 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -10,6 +10,7 @@ process CNVKIT_BATCH { input: tuple val(meta), path(tumor), path(normal) path fasta + path fasta_fai path targets path reference @@ -34,12 +35,15 @@ process CNVKIT_BATCH { // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" if (normal_exists){ def normal_prefix = normal.BaseName @@ -48,9 +52,9 @@ process CNVKIT_BATCH { // germline mode // normal samples must be input without a flag - // requires flag --normal to be empty + // requires flag --normal to be empty [] if(!tumor_exists){ - tumor_out = normal.BaseName + ".bam" + tumor_out = "${normal_prefix}" + ".bam" normal_args = "--normal " } // somatic mode @@ -62,28 +66,130 @@ process CNVKIT_BATCH { def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - """ - if $tumor_cram; then - samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - fi - if $normal_cram; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi + // somatic_mode cram_input + if (tumor_cram && normal_cram){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args - cnvkit.py \\ - batch \\ - $tumor_out \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // somatic_mode bam_input + else if (tumor_bam && normal_bam){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only_mode cram_input + else if(tumor_cram && !normal_exists){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only bam_input + else if(tumor_bam && !normal_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode cram_input + // normal_args must be --normal [] + else if (normal_cram && !tumor_exists){ + """ + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode bam_input + else if (normal_bam && !tumor_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ } diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 2cd675c7..89762192 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -29,6 +29,10 @@ input: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) - targetfile: type: file description: | diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index c93e3a00..daf87a37 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' -workflow test_cnvkit_hybrid { +workflow test_cnvkit_hybrid_somatic { input = [ [ id:'test' ], // meta map @@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) - CNVKIT_HYBRID ( input, fasta, targets, [] ) + CNVKIT_HYBRID ( input, fasta, [], targets, [] ) } -workflow test_cnvkit_wgs { +workflow test_cnvkit_wgs_somatic { input = [ [ id:'test'], // meta map @@ -29,22 +29,24 @@ workflow test_cnvkit_wgs { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, [], [], [] ) } -workflow test_cnvkit_cram { +workflow test_cnvkit_cram_wgs_somatic { input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, fasta_fai, [], [] ) } -workflow test_cnvkit_tumoronly { + +workflow test_cnvkit_tumoronly_hybrid_bam { input = [ [ id:'test'], // meta map @@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly { ] reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, [], [], reference ) + CNVKIT_TUMORONLY ( input, [], [], [], reference ) } -workflow test_cnvkit_tumoronly_cram { +workflow test_cnvkit_tumoronly_hybrid_cram { input = [ [ id:'test'], // meta map @@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram { fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, fasta, [], reference ) + CNVKIT_TUMORONLY ( input, fasta, [], [], reference ) } -workflow test_cnvkit_germline_cram { +workflow test_cnvkit_germline_hybrid_cram { input = [ [ id:'test'], // meta map @@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) - CNVKIT_GERMLINE ( input, fasta, targets, []) + CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, []) +} + +workflow test_cnvkit_germline_hybrid_bam { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, [], targets, []) } diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 928a08de..006e142f 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,5 +1,5 @@ -- name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_hybrid_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -26,8 +26,8 @@ - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 -- name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -56,8 +56,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_cram_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -86,8 +86,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -109,8 +109,8 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_tumoronly_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -132,8 +132,33 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_germline_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_germline_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f + +- name: cnvkit batch test_cnvkit_germline_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch