diff --git a/modules/cnvkit/antitarget/main.nf b/modules/cnvkit/antitarget/main.nf new file mode 100644 index 00000000..bf6461bd --- /dev/null +++ b/modules/cnvkit/antitarget/main.nf @@ -0,0 +1,36 @@ +process CNVKIT_ANTITARGET { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + tuple val(meta), path(targets) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + cnvkit.py \\ + antitarget \\ + $targets \\ + --output ${prefix}.antitarget.bed \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/cnvkit/antitarget/meta.yml b/modules/cnvkit/antitarget/meta.yml new file mode 100644 index 00000000..28a2bfa4 --- /dev/null +++ b/modules/cnvkit/antitarget/meta.yml @@ -0,0 +1,44 @@ +name: cnvkit_antitarget +description: +keywords: + - cvnkit + - antitarget +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: "https://github.com/etal/cnvkit" + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing off-target regions + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index 7c44d9f6..c1f9ef87 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -2,10 +2,10 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9' : null) + conda (params.enable_conda ? 'bioconda::cnvkit=0.9.9 bioconda::samtools=1.15.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0' : - 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' : + 'quay.io/biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:304d1c5ab610f216e77c61420ebe85f1e7c5968a-0' }" input: tuple val(meta), path(tumor), path(normal) @@ -18,6 +18,8 @@ process CNVKIT_BATCH { tuple val(meta), path("*.cnn"), emit: cnn, optional: true tuple val(meta), path("*.cnr"), emit: cnr, optional: true tuple val(meta), path("*.cns"), emit: cns, optional: true + tuple val(meta), path("*.pdf"), emit: pdf, optional: true + tuple val(meta), path("*.png"), emit: png, optional: true path "versions.yml" , emit: versions when: @@ -25,21 +27,39 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - def normal_args = normal ? "--normal $normal" : "" - def fasta_args = fasta ? "--fasta $fasta" : "" + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + // input pair is assumed to have same extension if both exist + def is_cram = tumor.Extension == "cram" ? true : false + def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + + // do not run samtools on normal samples in tumor_only mode + def normal_exists = normal ? true: false + // tumor_only mode does not need fasta & target + // instead it requires a pre-computed reference.cnn which is built from fasta & target + def (normal_out, normal_args, fasta_args) = ["", "", ""] + + if (normal_exists){ + def normal_prefix = normal.BaseName + normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" + normal_args = normal_prefix ? "--normal $normal_out" : "" + fasta_args = fasta ? "--fasta $fasta" : "" + } + + def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - def target_args = "" - if (args.contains("--method wgs") || args.contains("-m wgs")) { - target_args = targets ? "--targets $targets" : "" - } - else { - target_args = "--targets $targets" - } """ + if $is_cram; then + samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out + if $normal_exists; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + fi + cnvkit.py \\ batch \\ - $tumor \\ + $tumor_out \\ $normal_args \\ $fasta_args \\ $reference_args \\ diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 474c55f2..2cd675c7 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -11,27 +11,6 @@ tools: homepage: https://cnvkit.readthedocs.io/en/stable/index.html documentation: https://cnvkit.readthedocs.io/en/stable/index.html licence: ["Apache-2.0"] -params: - - outdir: - type: string - description: | - The pipeline's output directory. By default, the module will - output files into `$params.outdir/` - - publish_dir_mode: - type: string - description: | - Value for the Nextflow `publishDir` mode parameter. - Available: symlink, rellink, link, copy, copyNoFollow, move. - - enable_conda: - type: boolean - description: | - Run the module with Conda using the software specified - via the `conda` directive - - singularity_pull_docker_container: - type: boolean - description: | - Instead of directly downloading Singularity images for use with Singularity, - force the workflow to pull and convert Docker containers instead. input: - meta: type: map @@ -49,7 +28,7 @@ input: - fasta: type: file description: | - Input reference genome fasta file + Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) - targetfile: type: file description: | @@ -80,6 +59,14 @@ output: type: file description: File containing copy number segment information pattern: "*.{cns}" + - pdf: + type: file + description: File with plot of copy numbers or segments on chromosomes + pattern: "*.{pdf}" + - png: + type: file + description: File with plot of bin-level log2 coverages and segmentation calls + pattern: "*.{png}" - versions: type: file description: File containing software versions @@ -91,3 +78,4 @@ authors: - "@drpatelh" - "@fbdtemme" - "@lassefolkersen" + - "@SusiJo" diff --git a/modules/cnvkit/reference/main.nf b/modules/cnvkit/reference/main.nf new file mode 100644 index 00000000..10458f27 --- /dev/null +++ b/modules/cnvkit/reference/main.nf @@ -0,0 +1,40 @@ +process CNVKIT_REFERENCE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.9--pyhdfd78af_0': + 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" + + input: + path fasta + path targets + path antitargets + + output: + path "*.cnn" , emit: cnn + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: targets.BaseName + + """ + cnvkit.py \\ + reference \\ + --fasta $fasta \\ + --targets $targets \\ + --antitargets $antitargets \\ + --output ${prefix}.reference.cnn \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/cnvkit/reference/meta.yml b/modules/cnvkit/reference/meta.yml new file mode 100644 index 00000000..2e0fef1a --- /dev/null +++ b/modules/cnvkit/reference/meta.yml @@ -0,0 +1,47 @@ +name: cnvkit_reference +description: +keywords: + - cnvkit + - reference +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. + It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + tool_dev_url: https://github.com/etal/cnvkit + doi: 10.1371/journal.pcbi.1004873 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: File containing reference genome + pattern: "*.{fasta}" + - targets: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - antitargets: + type: file + description: File containing off-target genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reference: + type: file + description: File containing a copy-number reference (required for CNV calling in tumor_only mode) + pattern: "*.{cnn}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@SusiJo" diff --git a/modules/gatk4/cnnscorevariants/main.nf b/modules/gatk4/cnnscorevariants/main.nf index c02c6035..a1bb3811 100644 --- a/modules/gatk4/cnnscorevariants/main.nf +++ b/modules/gatk4/cnnscorevariants/main.nf @@ -9,7 +9,7 @@ process GATK4_CNNSCOREVARIANTS { container 'broadinstitute/gatk:4.2.6.1' //Biocontainers is missing a package input: - tuple val(meta), path(vcf), path(aligned_input), path(intervals) + tuple val(meta), path(vcf), path(tbi), path(aligned_input), path(intervals) path fasta path fai path dict @@ -17,8 +17,9 @@ process GATK4_CNNSCOREVARIANTS { path weights output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*cnn.vcf.gz") , emit: vcf + tuple val(meta), path("*cnn.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -40,7 +41,7 @@ process GATK4_CNNSCOREVARIANTS { """ gatk --java-options "-Xmx${avail_mem}g" CNNScoreVariants \\ --variant $vcf \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.cnn.vcf.gz \\ --reference $fasta \\ $interval_command \\ $aligned_input \\ diff --git a/modules/gatk4/cnnscorevariants/meta.yml b/modules/gatk4/cnnscorevariants/meta.yml index 1d47e6e7..4e59cb8e 100644 --- a/modules/gatk4/cnnscorevariants/meta.yml +++ b/modules/gatk4/cnnscorevariants/meta.yml @@ -25,6 +25,10 @@ input: type: file description: VCF file pattern: "*.vcf.gz" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" - aligned_input: type: file description: BAM/CRAM file from alignment (optional) @@ -67,6 +71,10 @@ output: type: file description: Annotated VCF file pattern: "*.vcf" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" authors: - "@FriederikeHanssen" diff --git a/modules/gatk4/filtervarianttranches/main.nf b/modules/gatk4/filtervarianttranches/main.nf new file mode 100644 index 00000000..98e620e7 --- /dev/null +++ b/modules/gatk4/filtervarianttranches/main.nf @@ -0,0 +1,51 @@ +process GATK4_FILTERVARIANTTRANCHES { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(intervals) + path resources + path resources_index + path fasta + path fai + path dict + + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def resources = resources.collect{"--resource $it"}.join(' ') + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK FilterVariantTranches] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" FilterVariantTranches \\ + --variant $vcf \\ + $resources \\ + --output ${prefix}.filtered.vcf.gz \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/filtervarianttranches/meta.yml b/modules/gatk4/filtervarianttranches/meta.yml new file mode 100644 index 00000000..f89063a5 --- /dev/null +++ b/modules/gatk4/filtervarianttranches/meta.yml @@ -0,0 +1,68 @@ +name: "gatk4_filtervarianttranches" +description: Apply tranche filtering +keywords: + - gatk4 + - filtervarianttranches + +tools: + - "gatk4": + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: a VCF file containing variants, must have info key:CNN_2D + pattern: "*.vcf.gz" + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + - resources: + type: list + description: resource A VCF containing known SNP and or INDEL sites. Can be supplied as many times as necessary + pattern: "*.vcf.gz" + - resources_index: + type: list + description: Index of resource VCF containing known SNP and or INDEL sites. Can be supplied as many times as necessary + pattern: "*.vcf.gz" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" + +authors: + - "@FriederikeHanssen" diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index ff91e06f..5f6538e7 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -10,13 +10,13 @@ process MOSDEPTH { input: tuple val(meta), path(bam), path(bai) path bed - val window_size + path fasta output: tuple val(meta), path('*.global.dist.txt') , emit: global_txt tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: d4 , optional:true + tuple val(meta), path('*.per-base.d4') , emit: per_base_d4 , optional:true tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true tuple val(meta), path('*.per-base.bed.gz.csi'), emit: per_base_csi, optional:true tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true @@ -29,19 +29,21 @@ process MOSDEPTH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if (window_size) { - interval = "--by ${window_size}" - } else if ( bed ) { - interval = "--by ${bed}" - } else { - interval = "" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } + """ mosdepth \\ + --threads ${task.cpus} \\ $interval \\ + $reference \\ $args \\ $prefix \\ $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') diff --git a/modules/mosdepth/meta.yml b/modules/mosdepth/meta.yml index 636e966b..6e9e34c9 100644 --- a/modules/mosdepth/meta.yml +++ b/modules/mosdepth/meta.yml @@ -30,10 +30,10 @@ input: type: file description: BED file with intersected intervals pattern: "*.{bed}" - - window_size: - type: integer - description: Window size - pattern: "[0-9]+" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" output: - meta: type: map @@ -60,6 +60,10 @@ output: type: file description: Index file for BED file with per-base coverage pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" - regions_bed: type: file description: BED file with per-region coverage @@ -76,3 +80,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@ramprasadn" + - "@matthdsm" diff --git a/modules/picard/collecthsmetrics/main.nf b/modules/picard/collecthsmetrics/main.nf index 317aff4b..06b6b8f3 100644 --- a/modules/picard/collecthsmetrics/main.nf +++ b/modules/picard/collecthsmetrics/main.nf @@ -24,7 +24,7 @@ process PICARD_COLLECTHSMETRICS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "-R $fasta" : "" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3 if (!task.memory) { diff --git a/modules/picard/collectmultiplemetrics/main.nf b/modules/picard/collectmultiplemetrics/main.nf index a653b549..3c3152df 100644 --- a/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/picard/collectmultiplemetrics/main.nf @@ -22,6 +22,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -35,7 +36,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { $args \\ --INPUT $bam \\ --OUTPUT ${prefix}.CollectMultipleMetrics \\ - --REFERENCE_SEQUENCE $fasta + $reference cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/picard/collectwgsmetrics/main.nf b/modules/picard/collectwgsmetrics/main.nf index e6dd49e9..cab1fe0c 100644 --- a/modules/picard/collectwgsmetrics/main.nf +++ b/modules/picard/collectwgsmetrics/main.nf @@ -34,7 +34,7 @@ process PICARD_COLLECTWGSMETRICS { $args \\ --INPUT $bam \\ --OUTPUT ${prefix}.CollectWgsMetrics.coverage_metrics \\ - --REFERENCE_SEQUENCE $fasta + --REFERENCE_SEQUENCE ${fasta} cat <<-END_VERSIONS > versions.yml diff --git a/modules/umitools/dedup/main.nf b/modules/umitools/dedup/main.nf index dfcbcf2f..07e6061d 100644 --- a/modules/umitools/dedup/main.nf +++ b/modules/umitools/dedup/main.nf @@ -9,12 +9,13 @@ process UMITOOLS_DEDUP { input: tuple val(meta), path(bam), path(bai) + val get_output_stats output: tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*edit_distance.tsv"), emit: tsv_edit_distance - tuple val(meta), path("*per_umi.tsv") , emit: tsv_per_umi - tuple val(meta), path("*per_position.tsv") , emit: tsv_umi_per_position + tuple val(meta), path("*edit_distance.tsv"), optional:true, emit: tsv_edit_distance + tuple val(meta), path("*per_umi.tsv") , optional:true, emit: tsv_per_umi + tuple val(meta), path("*per_position.tsv") , optional:true, emit: tsv_umi_per_position path "versions.yml" , emit: versions when: @@ -24,12 +25,13 @@ process UMITOOLS_DEDUP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def paired = meta.single_end ? "" : "--paired" + def stats = get_output_stats ? "--output-stats $prefix" : "" """ umi_tools \\ dedup \\ -I $bam \\ -S ${prefix}.bam \\ - --output-stats $prefix \\ + $stats \\ $paired \\ $args diff --git a/modules/umitools/dedup/meta.yml b/modules/umitools/dedup/meta.yml index eee8952f..56888e5a 100644 --- a/modules/umitools/dedup/meta.yml +++ b/modules/umitools/dedup/meta.yml @@ -26,6 +26,10 @@ input: description: | BAM index files corresponding to the input BAM file. pattern: "*.{bai}" + - get_output_stats: + type: boolean + description: | + Whether or not to generate output stats. output: - meta: type: map diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index e265a10d..d8227203 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -451,10 +451,18 @@ cmseq/polymut: - modules/cmseq/polymut/** - tests/modules/cmseq/polymut/** +cnvkit/antitarget: + - modules/cnvkit/antitarget/** + - tests/modules/cnvkit/antitarget/** + cnvkit/batch: - modules/cnvkit/batch/** - tests/modules/cnvkit/batch/** +cnvkit/reference: + - modules/cnvkit/reference/** + - tests/modules/cnvkit/reference/** + controlfreec/assesssignificance: - modules/controlfreec/assesssignificance/** - tests/modules/controlfreec/assesssignificance/** @@ -763,6 +771,10 @@ gatk4/filtermutectcalls: - modules/gatk4/filtermutectcalls/** - tests/modules/gatk4/filtermutectcalls/** +gatk4/filtervarianttranches: + - modules/gatk4/filtervarianttranches/** + - tests/modules/gatk4/filtervarianttranches/** + gatk4/gatherbqsrreports: - modules/gatk4/gatherbqsrreports/** - tests/modules/gatk4/gatherbqsrreports/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 5937e869..39331664 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -142,6 +142,7 @@ params { genome_21_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.sizes" genome_21_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list" genome_21_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" + genome_21_multi_interval_antitarget_bed = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.antitarget.bed" genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" @@ -266,6 +267,8 @@ params { test2_haplotc_ann_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" test2_haplotc_ann_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test_haplotc_cnn_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz" + test_haplotc_cnn_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi" test2_haplotc_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" test2_haplotc_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" diff --git a/tests/modules/cnvkit/antitarget/main.nf b/tests/modules/cnvkit/antitarget/main.nf new file mode 100644 index 00000000..44e49eb2 --- /dev/null +++ b/tests/modules/cnvkit/antitarget/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CNVKIT_ANTITARGET } from '../../../../modules/cnvkit/antitarget/main.nf' + +workflow test_cnvkit_antitarget { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + ] + + CNVKIT_ANTITARGET ( input ) +} + diff --git a/tests/modules/cnvkit/antitarget/nextflow.config b/tests/modules/cnvkit/antitarget/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/cnvkit/antitarget/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/cnvkit/antitarget/test.yml b/tests/modules/cnvkit/antitarget/test.yml new file mode 100644 index 00000000..e1c3d0c8 --- /dev/null +++ b/tests/modules/cnvkit/antitarget/test.yml @@ -0,0 +1,8 @@ +- name: cnvkit antitarget test_cnvkit_antitarget + command: nextflow run ./tests/modules/cnvkit/antitarget -entry test_cnvkit_antitarget -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/antitarget/nextflow.config + tags: + - cnvkit + - cnvkit/antitarget + files: + - path: output/cnvkit/test.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index 6b40dec6..988ed3ad 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -35,8 +35,8 @@ workflow test_cnvkit_cram { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -50,8 +50,20 @@ workflow test_cnvkit_tumoronly { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), [] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) CNVKIT_TUMORONLY ( input, [], [], reference ) } + +workflow test_cnvkit_tumoronly_cram { + + input = [ + [ id:'test'], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + + CNVKIT_TUMORONLY ( input, fasta, [], reference ) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 57af3603..00d6a767 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,15 +1,14 @@ - name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/baits.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/baits.target.bed md5sum: 26d25ff2d6c45b6d92169b3559c6acdb - path: output/cnvkit/reference.cnn - md5sum: ac99c1ad8b917b96ae15119146c91ab9 + md5sum: 035d031f54c5f1b43b903da96559b475 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.bintest.cns @@ -28,19 +27,18 @@ md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 - name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/genome.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/genome.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: output/cnvkit/genome.target.bed md5sum: a13353ae9c8405e701390c069255bbd2 - path: output/cnvkit/reference.cnn - md5sum: 05c6211e0179885b8a83e44fd21d5f86 + md5sum: 1606a85410bfaa79464be6e98699aa83 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn @@ -59,19 +57,18 @@ md5sum: 6ae6b3fce7299eedca6133d911c38fe1 - name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch files: - path: output/cnvkit/genome.antitarget.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/genome.bed md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - path: output/cnvkit/genome.target.bed md5sum: a13353ae9c8405e701390c069255bbd2 - path: output/cnvkit/reference.cnn - md5sum: 05c6211e0179885b8a83e44fd21d5f86 + md5sum: 1606a85410bfaa79464be6e98699aa83 - path: output/cnvkit/test.paired_end.sorted.antitargetcoverage.cnn md5sum: 203caf8cef6935bb50b4138097955cb8 - path: output/cnvkit/test.paired_end.sorted.targetcoverage.cnn @@ -90,12 +87,21 @@ md5sum: 6ae6b3fce7299eedca6133d911c38fe1 - name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - - cnvkit/batch - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/reference.antitarget-tmp.bed + - path: output/cnvkit/reference.target-tmp.bed + md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + +- name: cnvkit batch test_cnvkit_tumoronly_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch files: - path: output/cnvkit/reference.antitarget-tmp.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/cnvkit/reference.target-tmp.bed md5sum: 26d25ff2d6c45b6d92169b3559c6acdb diff --git a/tests/modules/cnvkit/reference/main.nf b/tests/modules/cnvkit/reference/main.nf new file mode 100644 index 00000000..a72ad566 --- /dev/null +++ b/tests/modules/cnvkit/reference/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CNVKIT_REFERENCE } from '../../../../modules/cnvkit/reference/main.nf' + +workflow test_cnvkit_reference { + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + antitargets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_antitarget_bed'], checkIfExists: true) + + CNVKIT_REFERENCE ( fasta, targets, antitargets ) +} diff --git a/tests/modules/cnvkit/reference/nextflow.config b/tests/modules/cnvkit/reference/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/cnvkit/reference/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/cnvkit/reference/test.yml b/tests/modules/cnvkit/reference/test.yml new file mode 100644 index 00000000..a5baf0a2 --- /dev/null +++ b/tests/modules/cnvkit/reference/test.yml @@ -0,0 +1,8 @@ +- name: cnvkit reference test_cnvkit_reference + command: nextflow run ./tests/modules/cnvkit/reference -entry test_cnvkit_reference -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/reference/nextflow.config + tags: + - cnvkit/reference + - cnvkit + files: + - path: output/cnvkit/multi_intervals.reference.cnn + md5sum: 7c4a7902f5ab101b1f9d6038d331b3d9 diff --git a/tests/modules/gatk4/cnnscorevariants/main.nf b/tests/modules/gatk4/cnnscorevariants/main.nf index d03acb78..1654c54a 100644 --- a/tests/modules/gatk4/cnnscorevariants/main.nf +++ b/tests/modules/gatk4/cnnscorevariants/main.nf @@ -7,7 +7,8 @@ include { GATK4_CNNSCOREVARIANTS } from '../../../../modules/gatk4/cnnscorevaria workflow test_gatk4_cnnscorevariants { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), [], [] ] diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index 2cbbf5fa..eb2e681f 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -1,9 +1,9 @@ - name: gatk4 cnnscorevariants test_gatk4_cnnscorevariants command: nextflow run ./tests/modules/gatk4/cnnscorevariants -entry test_gatk4_cnnscorevariants -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/cnnscorevariants/nextflow.config tags: - - gatk4 - gatk4/cnnscorevariants + - gatk4 files: - - path: output/gatk4/test.vcf.gz - contains: - - "##ALT=