From 15fba1dd7c93650acbcabdd5ff8c0732c104dc48 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 17 May 2022 23:01:24 +0200 Subject: [PATCH 01/13] add filter variant tranches --- modules/gatk4/filtervarianttranches/main.nf | 49 ++++++++++++++++++ modules/gatk4/filtervarianttranches/meta.yml | 51 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../gatk4/filtervarianttranches/main.nf | 15 ++++++ .../filtervarianttranches/nextflow.config | 5 ++ .../gatk4/filtervarianttranches/test.yml | 14 +++++ 6 files changed, 138 insertions(+) create mode 100644 modules/gatk4/filtervarianttranches/main.nf create mode 100644 modules/gatk4/filtervarianttranches/meta.yml create mode 100644 tests/modules/gatk4/filtervarianttranches/main.nf create mode 100644 tests/modules/gatk4/filtervarianttranches/nextflow.config create mode 100644 tests/modules/gatk4/filtervarianttranches/test.yml diff --git a/modules/gatk4/filtervarianttranches/main.nf b/modules/gatk4/filtervarianttranches/main.nf new file mode 100644 index 00000000..d77fb29f --- /dev/null +++ b/modules/gatk4/filtervarianttranches/main.nf @@ -0,0 +1,49 @@ +process GATK4_FILTERVARIANTTRANCHES { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(intervals) + path ressources + path fasta + path fai + path dict + + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def ressources = ressources.collect{"--ressources $it"}.join(' ') + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK FilterVariantTranches] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" FilterVariantTranches \\ + --variant $vcf \\ + $ressources \\ + --output ${prefix}.filtered.vcf.gz \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk4/filtervarianttranches/meta.yml b/modules/gatk4/filtervarianttranches/meta.yml new file mode 100644 index 00000000..e260a649 --- /dev/null +++ b/modules/gatk4/filtervarianttranches/meta.yml @@ -0,0 +1,51 @@ +name: "gatk4_filtervarianttranches" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk4": + ## TODO nf-core: Add a description and other details for the software below + description: "Genome Analysis Toolkit (GATK4)" + homepage: "None" + documentation: "None" + tool_dev_url: "https://github.com/broadinstitute/gatk" + doi: "" + licence: "['BSD-3-clause']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@FriederikeHanssen" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3970c113..81dbf3c3 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -755,6 +755,10 @@ gatk4/filtermutectcalls: - modules/gatk4/filtermutectcalls/** - tests/modules/gatk4/filtermutectcalls/** +gatk4/filtervarianttranches: + - modules/gatk4/filtervarianttranches/** + - tests/modules/gatk4/filtervarianttranches/** + gatk4/gatherbqsrreports: - modules/gatk4/gatherbqsrreports/** - tests/modules/gatk4/gatherbqsrreports/** diff --git a/tests/modules/gatk4/filtervarianttranches/main.nf b/tests/modules/gatk4/filtervarianttranches/main.nf new file mode 100644 index 00000000..35cc5a6b --- /dev/null +++ b/tests/modules/gatk4/filtervarianttranches/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_FILTERVARIANTTRANCHES } from '../../../../modules/gatk4/filtervarianttranches/main.nf' + +workflow test_gatk4_filtervarianttranches { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK4_FILTERVARIANTTRANCHES ( input ) +} diff --git a/tests/modules/gatk4/filtervarianttranches/nextflow.config b/tests/modules/gatk4/filtervarianttranches/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/filtervarianttranches/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/filtervarianttranches/test.yml b/tests/modules/gatk4/filtervarianttranches/test.yml new file mode 100644 index 00000000..1ba2657e --- /dev/null +++ b/tests/modules/gatk4/filtervarianttranches/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk4/filtervarianttranches +- name: "gatk4 filtervarianttranches" + command: nextflow run ./tests/modules/gatk4/filtervarianttranches -entry test_gatk4_filtervarianttranches -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtervarianttranches/nextflow.config + tags: + - "gatk4" + # + - "gatk4/filtervarianttranches" + # + files: + - path: "output/gatk4/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk4/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 5ae757121f758584788191fc96fc32f162669c37 Mon Sep 17 00:00:00 2001 From: Rike Date: Tue, 17 May 2022 23:38:26 +0200 Subject: [PATCH 02/13] Add indices and meta --- modules/gatk4/filtervarianttranches/main.nf | 13 ++-- modules/gatk4/filtervarianttranches/meta.yml | 61 +++++++++++-------- .../gatk4/filtervarianttranches/main.nf | 35 +++++++++-- 3 files changed, 73 insertions(+), 36 deletions(-) diff --git a/modules/gatk4/filtervarianttranches/main.nf b/modules/gatk4/filtervarianttranches/main.nf index d77fb29f..2887324b 100644 --- a/modules/gatk4/filtervarianttranches/main.nf +++ b/modules/gatk4/filtervarianttranches/main.nf @@ -8,16 +8,17 @@ process GATK4_FILTERVARIANTTRANCHES { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(intervals) - path ressources + tuple val(meta), path(vcf), path(tbi),path(intervals) + path resources + path resources_index path fasta path fai path dict output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,7 +27,7 @@ process GATK4_FILTERVARIANTTRANCHES { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def ressources = ressources.collect{"--ressources $it"}.join(' ') + def resources = resources.collect{"--resource $it"}.join(' ') def avail_mem = 3 if (!task.memory) { log.info '[GATK FilterVariantTranches] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -36,7 +37,7 @@ process GATK4_FILTERVARIANTTRANCHES { """ gatk --java-options "-Xmx${avail_mem}g" FilterVariantTranches \\ --variant $vcf \\ - $ressources \\ + $resources \\ --output ${prefix}.filtered.vcf.gz \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/filtervarianttranches/meta.yml b/modules/gatk4/filtervarianttranches/meta.yml index e260a649..7682b534 100644 --- a/modules/gatk4/filtervarianttranches/meta.yml +++ b/modules/gatk4/filtervarianttranches/meta.yml @@ -1,51 +1,64 @@ name: "gatk4_filtervarianttranches" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Apply tranche filtering keywords: - - sort + - gatk4 + - filtervarianttranches + tools: - "gatk4": - ## TODO nf-core: Add a description and other details for the software below - description: "Genome Analysis Toolkit (GATK4)" - homepage: "None" - documentation: "None" - tool_dev_url: "https://github.com/broadinstitute/gatk" - doi: "" - licence: "['BSD-3-clause']" + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - vcf: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: a VCF file containing variants, must have info key:CNN_2D + pattern: "*.vcf.gz" + - tbi: + type: file + description: tbi file matching with -vcf + pattern: "*.vcf.gz.tbi" + - resources: + type: list + description: resource A VCF containing known SNP and or INDEL sites. Can be supplied as many times as necessary + pattern: "*.vcf.gz" + - resources_index: + type: list + description: Index of resource VCF containing known SNP and or INDEL sites. Can be supplied as many times as necessary + pattern: "*.vcf.gz" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - vcf: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: VCF file + pattern: "*.vcf.gz" authors: - "@FriederikeHanssen" diff --git a/tests/modules/gatk4/filtervarianttranches/main.nf b/tests/modules/gatk4/filtervarianttranches/main.nf index 35cc5a6b..650e78a8 100644 --- a/tests/modules/gatk4/filtervarianttranches/main.nf +++ b/tests/modules/gatk4/filtervarianttranches/main.nf @@ -5,11 +5,34 @@ nextflow.enable.dsl = 2 include { GATK4_FILTERVARIANTTRANCHES } from '../../../../modules/gatk4/filtervarianttranches/main.nf' workflow test_gatk4_filtervarianttranches { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - GATK4_FILTERVARIANTTRANCHES ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + resources = [ + file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + ] + resources_index =[ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), + //file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + ] + // , [ + // 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + // 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + // '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + // 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + // ]] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + + GATK4_FILTERVARIANTTRANCHES ( input, resources, resources_index, fasta, fai, dict) } From 520dd06ca35f9723a4da7ff4876c5a87176f9467 Mon Sep 17 00:00:00 2001 From: Rike Date: Sun, 22 May 2022 13:25:39 +0200 Subject: [PATCH 03/13] add new testdata --- tests/config/test_data.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 5937e869..cf7d45f6 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -266,6 +266,8 @@ params { test2_haplotc_ann_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" test2_haplotc_ann_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test_haplotc_cnn_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz" + test_haplotc_cnn_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi" test2_haplotc_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" test2_haplotc_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" From 5d72500d601432f5396e9022c3a709854197db1a Mon Sep 17 00:00:00 2001 From: Rike Date: Sun, 22 May 2022 13:34:56 +0200 Subject: [PATCH 04/13] remove variables used for test data generation, add tbi --- modules/gatk4/cnnscorevariants/main.nf | 9 +++++---- modules/gatk4/cnnscorevariants/meta.yml | 8 ++++++++ modules/gatk4/filtervarianttranches/main.nf | 7 ++++--- modules/gatk4/filtervarianttranches/meta.yml | 4 ++++ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/modules/gatk4/cnnscorevariants/main.nf b/modules/gatk4/cnnscorevariants/main.nf index c02c6035..a1bb3811 100644 --- a/modules/gatk4/cnnscorevariants/main.nf +++ b/modules/gatk4/cnnscorevariants/main.nf @@ -9,7 +9,7 @@ process GATK4_CNNSCOREVARIANTS { container 'broadinstitute/gatk:4.2.6.1' //Biocontainers is missing a package input: - tuple val(meta), path(vcf), path(aligned_input), path(intervals) + tuple val(meta), path(vcf), path(tbi), path(aligned_input), path(intervals) path fasta path fai path dict @@ -17,8 +17,9 @@ process GATK4_CNNSCOREVARIANTS { path weights output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*cnn.vcf.gz") , emit: vcf + tuple val(meta), path("*cnn.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -40,7 +41,7 @@ process GATK4_CNNSCOREVARIANTS { """ gatk --java-options "-Xmx${avail_mem}g" CNNScoreVariants \\ --variant $vcf \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.cnn.vcf.gz \\ --reference $fasta \\ $interval_command \\ $aligned_input \\ diff --git a/modules/gatk4/cnnscorevariants/meta.yml b/modules/gatk4/cnnscorevariants/meta.yml index 1d47e6e7..4e59cb8e 100644 --- a/modules/gatk4/cnnscorevariants/meta.yml +++ b/modules/gatk4/cnnscorevariants/meta.yml @@ -25,6 +25,10 @@ input: type: file description: VCF file pattern: "*.vcf.gz" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" - aligned_input: type: file description: BAM/CRAM file from alignment (optional) @@ -67,6 +71,10 @@ output: type: file description: Annotated VCF file pattern: "*.vcf" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" authors: - "@FriederikeHanssen" diff --git a/modules/gatk4/filtervarianttranches/main.nf b/modules/gatk4/filtervarianttranches/main.nf index 2887324b..03323744 100644 --- a/modules/gatk4/filtervarianttranches/main.nf +++ b/modules/gatk4/filtervarianttranches/main.nf @@ -8,7 +8,7 @@ process GATK4_FILTERVARIANTTRANCHES { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(tbi),path(intervals) + tuple val(meta), path(vcf), path(tbi), path(intervals) path resources path resources_index path fasta @@ -17,8 +17,9 @@ process GATK4_FILTERVARIANTTRANCHES { output: - tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/gatk4/filtervarianttranches/meta.yml b/modules/gatk4/filtervarianttranches/meta.yml index 7682b534..f89063a5 100644 --- a/modules/gatk4/filtervarianttranches/meta.yml +++ b/modules/gatk4/filtervarianttranches/meta.yml @@ -59,6 +59,10 @@ output: type: file description: VCF file pattern: "*.vcf.gz" + - tbi: + type: file + description: VCF index file + pattern: "*.vcf.gz.tbi" authors: - "@FriederikeHanssen" From cd60b4c6eb0ecee7dbcaa2e404336545b4b6ecee Mon Sep 17 00:00:00 2001 From: Rike Date: Sun, 22 May 2022 13:35:35 +0200 Subject: [PATCH 05/13] add in testdata paths --- .../gatk4/filtervarianttranches/main.nf | 38 +++++++------------ .../filtervarianttranches/nextflow.config | 5 ++- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/tests/modules/gatk4/filtervarianttranches/main.nf b/tests/modules/gatk4/filtervarianttranches/main.nf index 650e78a8..91dde3d4 100644 --- a/tests/modules/gatk4/filtervarianttranches/main.nf +++ b/tests/modules/gatk4/filtervarianttranches/main.nf @@ -3,36 +3,24 @@ nextflow.enable.dsl = 2 include { GATK4_FILTERVARIANTTRANCHES } from '../../../../modules/gatk4/filtervarianttranches/main.nf' - +include { GATK4_CNNSCOREVARIANTS } from '../../../../modules/gatk4/cnnscorevariants/main.nf' +include { GATK4_HAPLOTYPECALLER } from '../../../../modules/gatk4/haplotypecaller/main.nf' workflow test_gatk4_filtervarianttranches { + resources = [ file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) ] + resources_index = [ + file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true), + ] + input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true), [] ] - resources = [ - file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) - ] - resources_index =[ file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), - //file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) - ] - // , [ - // 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', - // 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', - // '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', - // 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' - // ]] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - - GATK4_FILTERVARIANTTRANCHES ( input, resources, resources_index, fasta, fai, dict) + GATK4_FILTERVARIANTTRANCHES (input , resources, resources_index, fasta, fai, dict) } diff --git a/tests/modules/gatk4/filtervarianttranches/nextflow.config b/tests/modules/gatk4/filtervarianttranches/nextflow.config index 50f50a7a..7a8f0a06 100644 --- a/tests/modules/gatk4/filtervarianttranches/nextflow.config +++ b/tests/modules/gatk4/filtervarianttranches/nextflow.config @@ -1,5 +1,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file + + ext.args = "--info-key CNN_1D" +} From 2f36a44f61471ecf6bd6b047a2977d8dfe600ea8 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 23 May 2022 12:00:31 +0200 Subject: [PATCH 06/13] nf-core create-test-yml --- .../gatk4/filtervarianttranches/test.yml | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/tests/modules/gatk4/filtervarianttranches/test.yml b/tests/modules/gatk4/filtervarianttranches/test.yml index 1ba2657e..a4739643 100644 --- a/tests/modules/gatk4/filtervarianttranches/test.yml +++ b/tests/modules/gatk4/filtervarianttranches/test.yml @@ -1,14 +1,9 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk4/filtervarianttranches -- name: "gatk4 filtervarianttranches" - command: nextflow run ./tests/modules/gatk4/filtervarianttranches -entry test_gatk4_filtervarianttranches -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtervarianttranches/nextflow.config +- name: gatk4 filtervarianttranches test_gatk4_filtervarianttranches + command: nextflow run ./tests/modules/gatk4/filtervarianttranches -entry test_gatk4_filtervarianttranches -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtervarianttranches/nextflow.config tags: - - "gatk4" - # - - "gatk4/filtervarianttranches" - # + - gatk4/filtervarianttranches + - gatk4 files: - - path: "output/gatk4/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk4/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk4/test.filtered.vcf.gz + - path: output/gatk4/test.filtered.vcf.gz.tbi + md5sum: a7736ea2cea26420b1a86413adeefda8 From a8dac59c50fd805fe55035a3e8fb2133df66e6e1 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 23 May 2022 12:10:50 +0200 Subject: [PATCH 07/13] md5sum not consistent --- tests/modules/gatk4/filtervarianttranches/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/gatk4/filtervarianttranches/test.yml b/tests/modules/gatk4/filtervarianttranches/test.yml index a4739643..97cc1f0f 100644 --- a/tests/modules/gatk4/filtervarianttranches/test.yml +++ b/tests/modules/gatk4/filtervarianttranches/test.yml @@ -6,4 +6,3 @@ files: - path: output/gatk4/test.filtered.vcf.gz - path: output/gatk4/test.filtered.vcf.gz.tbi - md5sum: a7736ea2cea26420b1a86413adeefda8 From 175ea9b7f95a0e2fd3679f7a052c6dcb60b61a6e Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 23 May 2022 12:15:33 +0200 Subject: [PATCH 08/13] fix version --- modules/gatk4/filtervarianttranches/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/filtervarianttranches/main.nf b/modules/gatk4/filtervarianttranches/main.nf index 03323744..98e620e7 100644 --- a/modules/gatk4/filtervarianttranches/main.nf +++ b/modules/gatk4/filtervarianttranches/main.nf @@ -45,7 +45,7 @@ process GATK4_FILTERVARIANTTRANCHES { cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk4: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') END_VERSIONS """ } From 3af61cfbbc26c4af59016f2d80c74ecd186d765d Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 23 May 2022 12:26:12 +0200 Subject: [PATCH 09/13] fix input cardinality for index --- tests/modules/gatk4/cnnscorevariants/main.nf | 3 ++- tests/modules/gatk4/cnnscorevariants/test.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/modules/gatk4/cnnscorevariants/main.nf b/tests/modules/gatk4/cnnscorevariants/main.nf index d03acb78..1654c54a 100644 --- a/tests/modules/gatk4/cnnscorevariants/main.nf +++ b/tests/modules/gatk4/cnnscorevariants/main.nf @@ -7,7 +7,8 @@ include { GATK4_CNNSCOREVARIANTS } from '../../../../modules/gatk4/cnnscorevaria workflow test_gatk4_cnnscorevariants { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), [], [] ] diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index 2cbbf5fa..fd3c2b3a 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -7,3 +7,4 @@ - path: output/gatk4/test.vcf.gz contains: - "##ALT= Date: Mon, 23 May 2022 12:53:36 +0200 Subject: [PATCH 10/13] test path update --- tests/modules/gatk4/cnnscorevariants/test.yml | 10 +++++----- tests/modules/gatk4/filtervarianttranches/test.yml | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index fd3c2b3a..c19f7ac1 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -1,10 +1,10 @@ - name: gatk4 cnnscorevariants test_gatk4_cnnscorevariants command: nextflow run ./tests/modules/gatk4/cnnscorevariants -entry test_gatk4_cnnscorevariants -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/cnnscorevariants/nextflow.config tags: - - gatk4 - gatk4/cnnscorevariants + - gatk4 files: - - path: output/gatk4/test.vcf.gz - contains: - - "##ALT= Date: Mon, 23 May 2022 13:04:49 +0200 Subject: [PATCH 11/13] contains --- tests/modules/gatk4/cnnscorevariants/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index c19f7ac1..cef82b42 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -5,6 +5,6 @@ - gatk4 files: - path: output/gatk4/test.cnn.vcf.gz - contains: "##ALT= Date: Mon, 23 May 2022 13:07:08 +0200 Subject: [PATCH 12/13] add brackets to contains --- tests/modules/gatk4/cnnscorevariants/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index cef82b42..b3ab2517 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -5,6 +5,6 @@ - gatk4 files: - path: output/gatk4/test.cnn.vcf.gz - contains: "CNN_1D=-11.836;END=1951 GT:DP:GQ:MIN_DP:PL" + contains: ["CNN_1D=-11.836;END=1951 GT:DP:GQ:MIN_DP:PL"] - path: output/gatk4/test.cnn.vcf.gz.tbi md5sum: 76eda8249fa285c061e1a32da2ad95d7 From e34dcdd8bc5874b78e8af6bb375289be90be25f1 Mon Sep 17 00:00:00 2001 From: Rike Date: Mon, 23 May 2022 13:19:42 +0200 Subject: [PATCH 13/13] add brackets to contains --- tests/modules/gatk4/cnnscorevariants/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/modules/gatk4/cnnscorevariants/test.yml b/tests/modules/gatk4/cnnscorevariants/test.yml index b3ab2517..eb2e681f 100644 --- a/tests/modules/gatk4/cnnscorevariants/test.yml +++ b/tests/modules/gatk4/cnnscorevariants/test.yml @@ -5,6 +5,5 @@ - gatk4 files: - path: output/gatk4/test.cnn.vcf.gz - contains: ["CNN_1D=-11.836;END=1951 GT:DP:GQ:MIN_DP:PL"] + contains: ["##ALT=