diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index 7c44d9f6..c1f9ef87 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -2,10 +2,10 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9' : null) + conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9 bioconda::samtools=1.15.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0' : - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' : + 'quay.io/biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' }" input: tuple val(meta), path(tumor), path(normal) @@ -18,6 +18,8 @@ process CNVKIT_BATCH { tuple val(meta), path("*.cnn"), emit: cnn, optional: true tuple val(meta), path("*.cnr"), emit: cnr, optional: true tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true path "versions.yml" , emit: versions when: @@ -25,21 +27,39 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - def normal_args = normal ? "--normal $normal" : "" - def fasta_args = fasta ? "--fasta $fasta" : "" + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + // input pair is assumed to have same extension if both exist + def is_cram = tumor.Extension == "cram" ? true : false + def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // do not run samtools on normal samples in tumor_only mode + def normal_exists = normal ? true: false + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" + normal_args = normal_prefix ? "--normal $normal_out" : "" + fasta_args = fasta ? "--fasta $fasta" : "" + } + + def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - def target_args = "" - if (args.contains("--method wgs") || args.contains("-m wgs")) { - target_args = targets ? "--targets $targets" : "" - } - else { - target_args = "--targets $targets" - } """ + if $is_cram; then + samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out + if $normal_exists; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + fi + cnvkit.py \\ batch \\ - $tumor \\ + $tumor_out \\ $normal_args \\ $fasta_args \\ $reference_args \\ diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 474c55f2..2cd675c7 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -11,27 +11,6 @@ tools: homepage: https://cnvkit.readthedocs.io/en/stable/index.html documentation: https://cnvkit.readthedocs.io/en/stable/index.html licence: ["Apache-2.0"] -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map @@ -49,7 +28,7 @@ input: - fasta: type: file description: | - Input reference genome fasta file + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) - targetfile: type: file description: | @@ -80,6 +59,14 @@ output: type: file description: File containing copy number segment information pattern: "*.{cns}" + - pdf: + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" - versions: type: file description: File containing software versions @@ -91,3 +78,4 @@ authors: - "@drpatelh" - "@fbdtemme" - "@lassefolkersen" + - "@SusiJo" diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index 6b40dec6..988ed3ad 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -35,8 +35,8 @@ workflow test_cnvkit_cram { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -50,8 +50,20 @@ workflow test_cnvkit_tumoronly { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), [] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) CNVKIT_TUMORONLY ( input, [], [], reference ) } + +workflow test_cnvkit_tumoronly_cram { + + input = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + + CNVKIT_TUMORONLY ( input, fasta, [], reference ) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 57af3603..00d6a767 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,15 +1,14 @@ - name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/baits.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/baits.target.bed md5sum: 26d25ff2d6c45b6d92169b3559c6acdb - path: output/cnvkit/reference.cnn - md5sum: ac99c1ad8b917b96ae15119146c91ab9 + md5sum: 035d031f54c5f1b43b903da96559b475 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.bintest.cns @@ -28,19 +27,18 @@ md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 - name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/genome.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/genome.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: output/cnvkit/genome.target.bed md5sum: a13353ae9c8405e701390c069255bbd2 - path: output/cnvkit/reference.cnn - md5sum: 05c6211e0179885b8a83e44fd21d5f86 + md5sum: 1606a85410bfaa79464be6e98699aa83 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn @@ -59,19 +57,18 @@ md5sum: 6ae6b3fce7299eedca6133d911c38fe1 - name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/genome.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/genome.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: output/cnvkit/genome.target.bed md5sum: a13353ae9c8405e701390c069255bbd2 - path: output/cnvkit/reference.cnn - md5sum: 05c6211e0179885b8a83e44fd21d5f86 + md5sum: 1606a85410bfaa79464be6e98699aa83 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn @@ -90,12 +87,21 @@ md5sum: 6ae6b3fce7299eedca6133d911c38fe1 - name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/reference.antitarget-tmp.bed + - path: output/cnvkit/reference.target-tmp.bed + md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + +- name: cnvkit batch test_cnvkit_tumoronly_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch files: - path: output/cnvkit/reference.antitarget-tmp.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/reference.target-tmp.bed md5sum: 26d25ff2d6c45b6d92169b3559c6acdb