From 3c076024435704be6ed13a0fa715a4c11d1c8825 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 4 May 2022 16:51:09 +0200 Subject: [PATCH 01/26] Added the version.yml --- modules/rtgtools/vcfeval/main.nf | 67 +++++++++++++++++++ modules/rtgtools/vcfeval/meta.yml | 51 ++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/rtgtools/vcfeval/main.nf | 15 +++++ .../modules/rtgtools/vcfeval/nextflow.config | 5 ++ tests/modules/rtgtools/vcfeval/test.yml | 14 ++++ 6 files changed, 156 insertions(+) create mode 100644 modules/rtgtools/vcfeval/main.nf create mode 100644 modules/rtgtools/vcfeval/meta.yml create mode 100644 tests/modules/rtgtools/vcfeval/main.nf create mode 100644 tests/modules/rtgtools/vcfeval/nextflow.config create mode 100644 tests/modules/rtgtools/vcfeval/test.yml diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf new file mode 100644 index 00000000..4fd8aa4b --- /dev/null +++ b/modules/rtgtools/vcfeval/main.nf @@ -0,0 +1,67 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process RTGTOOLS_VCFEVAL { + tag "$meta.id" + label 'process_medium' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::rtg-tools=3.12.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': + 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtgtools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ +} diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml new file mode 100644 index 00000000..512a7734 --- /dev/null +++ b/modules/rtgtools/vcfeval/meta.yml @@ -0,0 +1,51 @@ +name: "rtgtools_vcfeval" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "rtgtools": + ## TODO nf-core: Add a description and other details for the software below + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['BSD']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 32a28477..f702deda 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1591,6 +1591,10 @@ rseqc/tin: - modules/rseqc/tin/** - tests/modules/rseqc/tin/** +rtgtools/vcfeval: + - modules/rtgtools/vcfeval/** + - tests/modules/rtgtools/vcfeval/** + salmon/index: - modules/salmon/index/** - tests/modules/salmon/index/** diff --git a/tests/modules/rtgtools/vcfeval/main.nf b/tests/modules/rtgtools/vcfeval/main.nf new file mode 100644 index 00000000..1a4297fc --- /dev/null +++ b/tests/modules/rtgtools/vcfeval/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RTGTOOLS_VCFEVAL } from '../../../../modules/rtgtools/vcfeval/main.nf' + +workflow test_rtgtools_vcfeval { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + RTGTOOLS_VCFEVAL ( input ) +} diff --git a/tests/modules/rtgtools/vcfeval/nextflow.config b/tests/modules/rtgtools/vcfeval/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/rtgtools/vcfeval/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml new file mode 100644 index 00000000..862f4acd --- /dev/null +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml rtgtools/vcfeval +- name: "rtgtools vcfeval" + command: nextflow run ./tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval -c ./tests/config/nextflow.config -c ./tests/modules/rtgtools/vcfeval/nextflow.config + tags: + - "rtgtools" + # + - "rtgtools/vcfeval" + # + files: + - path: "output/rtgtools/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/rtgtools/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 84b354ab6a110861db68e0d3eb6b0e746d014845 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 6 May 2022 14:23:40 +0200 Subject: [PATCH 02/26] Added the rtgtools/vcfeval module --- modules/rtgtools/vcfeval/main.nf | 70 ++++++++++--------------- modules/rtgtools/vcfeval/meta.yml | 54 +++++++++++-------- tests/config/test_data.config | 4 ++ tests/modules/rtgtools/vcfeval/main.nf | 32 +++++++++-- tests/modules/rtgtools/vcfeval/test.yml | 29 ++++++---- 5 files changed, 113 insertions(+), 76 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 4fd8aa4b..0b927a22 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -1,46 +1,18 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process RTGTOOLS_VCFEVAL { tag "$meta.id" label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda (params.enable_conda ? "bioconda::rtg-tools=3.12.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) + tuple val(meta), path(truth_vcf), path(truth_vcf_tbi), path(query_vcf), path(query_vcf_tbi), path(bed) + path(sdf) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here + tuple val(meta), path("*.txt"), emit: results path "versions.yml" , emit: versions when: @@ -49,19 +21,35 @@ process RTGTOOLS_VCFEVAL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def regions = bed ? "--bed-regions=$bed" : "" + def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" + def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" + + sdf_basename = sdf.getBaseName().replace(".tar","") + tar_decomp = "" + if((sdf =~ /.tar.gz\b/).find() == true) { + tar_decomp = "tar -xzf $sdf" + } + """ + $tar_decomp + + $truth_index + $query_index + + rtg vcfeval \\ + $args \\ + --baseline=$truth_vcf \\ + $regions \\ + --calls=$query_vcf \\ + --output=$prefix \\ + --template=$sdf_basename \\ + --threads=$task.cpus \\ + > ${prefix}_results.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": - rtgtools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) END_VERSIONS """ } diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index 512a7734..061ea876 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -1,51 +1,63 @@ name: "rtgtools_vcfeval" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: The VCFeval tool of RTG tools. It is used to evaluate called variants for agreement with a baseline variant set keywords: - - sort + - benchmarking + - vcf + - rtg-tools tools: - "rtgtools": - ## TODO nf-core: Add a description and other details for the software below description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" - homepage: "None" - documentation: "None" - tool_dev_url: "None" + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://github.com/RealTimeGenomics/rtg-tools" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" doi: "" licence: "['BSD']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - truth_vcf: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: A standard VCF to compare against + pattern: "*.{vcf,vcf.gz}" + - truth_vcf_index: + type: file + description: The index of the standard VCF (optional) + pattern: "*.tbi" + - query_vcf: + type: file + description: A VCF with called variants to benchmark against the standard + pattern: "*.{vcf,vcf.gz}" + - query_vcf_index: + type: file + description: The index of the called VCF (optional) + pattern: "*.tbi" + - bed: + type: file + description: The BED file of the called VCF + pattern: "*.bed" + - sdf: + type: folder/file + description: The SDF (RTG Sequence Data File) of the reference genome. Can be a folder or a tar-zipped folder. + pattern: "*.{,tar.gz}" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - results: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: A text file containing the results of the benchmark + pattern: "*.txt" authors: - "@nvnieuwk" diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 62e38c4d..5a204ae1 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -132,6 +132,7 @@ params { transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta" genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta" genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" + genome_21_sdf = "${test_data_dir}/genomics/homo_sapiens/genome/genome_sdf.tar.gz" genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" @@ -263,6 +264,9 @@ params { test2_haplotc_ann_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz" test2_haplotc_ann_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.ann.vcf.gz.tbi" + test2_haplotc_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz" + test2_haplotc_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test2_haplotc.vcf.gz.tbi" + test2_recal = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal" test2_recal_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.recal.idx" test2_tranches = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/variantrecalibrator/test2.tranches" diff --git a/tests/modules/rtgtools/vcfeval/main.nf b/tests/modules/rtgtools/vcfeval/main.nf index 1a4297fc..baf3c54d 100644 --- a/tests/modules/rtgtools/vcfeval/main.nf +++ b/tests/modules/rtgtools/vcfeval/main.nf @@ -7,9 +7,35 @@ include { RTGTOOLS_VCFEVAL } from '../../../../modules/rtgtools/vcfeval/main.nf' workflow test_rtgtools_vcfeval { input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) ] - RTGTOOLS_VCFEVAL ( input ) + sdf = Channel.value( + file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) + ) + + RTGTOOLS_VCFEVAL ( input, sdf ) +} + +workflow test_rtgtools_vcfeval_no_index { + + input = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + ] + + sdf = Channel.value( + file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) + ) + + RTGTOOLS_VCFEVAL ( input, sdf ) } diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 862f4acd..73ed8cc6 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -1,14 +1,21 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml rtgtools/vcfeval -- name: "rtgtools vcfeval" - command: nextflow run ./tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval -c ./tests/config/nextflow.config -c ./tests/modules/rtgtools/vcfeval/nextflow.config +- name: rtgtools vcfeval test_rtgtools_vcfeval + command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval -c tests/config/nextflow.config tags: - - "rtgtools" - # - - "rtgtools/vcfeval" - # + - rtgtools + - rtgtools/vcfeval files: - - path: "output/rtgtools/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/rtgtools/test_results.txt + md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 - path: output/rtgtools/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + md5sum: 270ed7a5a8e347b251eb4aa2198f98e8 + +- name: rtgtools vcfeval test_rtgtools_vcfeval_no_index + command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_index -c tests/config/nextflow.config + tags: + - rtgtools + - rtgtools/vcfeval + files: + - path: output/rtgtools/test_results.txt + md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 + - path: output/rtgtools/versions.yml + md5sum: 8d0407000988c78fa43fe5cfe3d4449d From 8c857dee3a3ea50a41e212ac2e8dda24b50c90f6 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 6 May 2022 16:16:57 +0200 Subject: [PATCH 03/26] Fixed the sdf folder fetching --- tests/config/test_data.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 5a204ae1..f8d6b948 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -132,7 +132,7 @@ params { transcriptome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/transcriptome.fasta" genome2_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome2.fasta" genome_chain_gz = "${test_data_dir}/genomics/homo_sapiens/genome/genome.chain.gz" - genome_21_sdf = "${test_data_dir}/genomics/homo_sapiens/genome/genome_sdf.tar.gz" + genome_21_sdf = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome_sdf.tar.gz" genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" From e271410017c52b6ada3008db313a702c8e005263 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Fri, 6 May 2022 16:21:32 +0200 Subject: [PATCH 04/26] Removed some whitespaces --- modules/rtgtools/vcfeval/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 0b927a22..9daadd05 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -24,11 +24,11 @@ process RTGTOOLS_VCFEVAL { def regions = bed ? "--bed-regions=$bed" : "" def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" - + sdf_basename = sdf.getBaseName().replace(".tar","") tar_decomp = "" if((sdf =~ /.tar.gz\b/).find() == true) { - tar_decomp = "tar -xzf $sdf" + tar_decomp = "tar -xzf $sdf" } """ From 6fd9246aef346ba9975db049fcdbc9de7d6074f9 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Mon, 9 May 2022 08:07:33 +0200 Subject: [PATCH 05/26] Update modules/rtgtools/vcfeval/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/rtgtools/vcfeval/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 9daadd05..97d96d89 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -8,7 +8,8 @@ process RTGTOOLS_VCFEVAL { 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" input: - tuple val(meta), path(truth_vcf), path(truth_vcf_tbi), path(query_vcf), path(query_vcf_tbi), path(bed) + tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(bed) + tuple path(truth_vcf), path(truth_vcf_tbi), path(sdf) output: From d18af7358a7b3cebb59a81a5b71414e40fc39256 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 9 May 2022 09:20:42 +0200 Subject: [PATCH 06/26] Moved untarring the genome sdf folder to the test itself --- modules/rtgtools/vcfeval/main.nf | 14 ++------ tests/modules/rtgtools/vcfeval/main.nf | 46 +++++++++++++++++++------- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 97d96d89..83895b64 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -8,8 +8,8 @@ process RTGTOOLS_VCFEVAL { 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" input: - tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(bed) - tuple path(truth_vcf), path(truth_vcf_tbi), + tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(bed) + tuple path(truth_vcf), path(truth_vcf_tbi) path(sdf) output: @@ -26,15 +26,7 @@ process RTGTOOLS_VCFEVAL { def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" - sdf_basename = sdf.getBaseName().replace(".tar","") - tar_decomp = "" - if((sdf =~ /.tar.gz\b/).find() == true) { - tar_decomp = "tar -xzf $sdf" - } - """ - $tar_decomp - $truth_index $query_index @@ -44,7 +36,7 @@ process RTGTOOLS_VCFEVAL { $regions \\ --calls=$query_vcf \\ --output=$prefix \\ - --template=$sdf_basename \\ + --template=$sdf \\ --threads=$task.cpus \\ > ${prefix}_results.txt diff --git a/tests/modules/rtgtools/vcfeval/main.nf b/tests/modules/rtgtools/vcfeval/main.nf index baf3c54d..35a2206b 100644 --- a/tests/modules/rtgtools/vcfeval/main.nf +++ b/tests/modules/rtgtools/vcfeval/main.nf @@ -3,39 +3,61 @@ nextflow.enable.dsl = 2 include { RTGTOOLS_VCFEVAL } from '../../../../modules/rtgtools/vcfeval/main.nf' +include { UNTAR } from '../../../modules/untar/main.nf' workflow test_rtgtools_vcfeval { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz_tbi'], checkIfExists: true), file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) ] - sdf = Channel.value( - file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) - ) + truth = [ + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) + ] - RTGTOOLS_VCFEVAL ( input, sdf ) + compressed_sdf = [ + [], + file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) + ] + + sdf = UNTAR( compressed_sdf ).untar + .map({ + meta, folder -> + folder + }) + + + RTGTOOLS_VCFEVAL ( input, truth, sdf ) } workflow test_rtgtools_vcfeval_no_index { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), - [], file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), [], file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) ] - sdf = Channel.value( - file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) - ) + truth = [ + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + [] + ] - RTGTOOLS_VCFEVAL ( input, sdf ) + compressed_sdf = [ + [], + file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) + ] + + sdf = UNTAR( compressed_sdf ).untar + .map({ + meta, folder -> + [folder] + }) + + RTGTOOLS_VCFEVAL ( input, truth, sdf ) } From d6dd4c2e2d094b18c2027bc636eaa88358381fb4 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Mon, 9 May 2022 15:35:16 +0200 Subject: [PATCH 07/26] Update tests/modules/rtgtools/vcfeval/test.yml Co-authored-by: FriederikeHanssen --- tests/modules/rtgtools/vcfeval/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 73ed8cc6..025581a6 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -18,4 +18,3 @@ - path: output/rtgtools/test_results.txt md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 - path: output/rtgtools/versions.yml - md5sum: 8d0407000988c78fa43fe5cfe3d4449d From f6262b4a103124c6ebf1bd650a297e6942f437ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 9 May 2022 15:47:07 +0200 Subject: [PATCH 08/26] fixed some issues --- modules/rtgtools/vcfeval/main.nf | 3 ++- modules/rtgtools/vcfeval/meta.yml | 6 +++--- tests/modules/rtgtools/vcfeval/main.nf | 12 +++++++----- tests/modules/rtgtools/vcfeval/test.yml | 1 - 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 83895b64..c7e8a12e 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -8,8 +8,9 @@ process RTGTOOLS_VCFEVAL { 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" input: - tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(bed) + tuple val(meta), path(query_vcf), path(query_vcf_tbi) tuple path(truth_vcf), path(truth_vcf_tbi) + path(bed) path(sdf) output: diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index 061ea876..36e1b290 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -40,9 +40,9 @@ input: description: The BED file of the called VCF pattern: "*.bed" - sdf: - type: folder/file - description: The SDF (RTG Sequence Data File) of the reference genome. Can be a folder or a tar-zipped folder. - pattern: "*.{,tar.gz}" + type: file + description: The SDF (RTG Sequence Data File) folder of the reference genome + pattern: "*" output: - meta: diff --git a/tests/modules/rtgtools/vcfeval/main.nf b/tests/modules/rtgtools/vcfeval/main.nf index 35a2206b..9a28e2d7 100644 --- a/tests/modules/rtgtools/vcfeval/main.nf +++ b/tests/modules/rtgtools/vcfeval/main.nf @@ -10,8 +10,7 @@ workflow test_rtgtools_vcfeval { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz_tbi'], checkIfExists: true), ] truth = [ @@ -19,6 +18,8 @@ workflow test_rtgtools_vcfeval { file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + compressed_sdf = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) @@ -31,7 +32,7 @@ workflow test_rtgtools_vcfeval { }) - RTGTOOLS_VCFEVAL ( input, truth, sdf ) + RTGTOOLS_VCFEVAL ( input, truth, bed, sdf ) } workflow test_rtgtools_vcfeval_no_index { @@ -40,7 +41,6 @@ workflow test_rtgtools_vcfeval_no_index { [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_vcf_gz'], checkIfExists: true), [], - file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) ] truth = [ @@ -48,6 +48,8 @@ workflow test_rtgtools_vcfeval_no_index { [] ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + compressed_sdf = [ [], file(params.test_data['homo_sapiens']['genome']['genome_21_sdf']) @@ -59,5 +61,5 @@ workflow test_rtgtools_vcfeval_no_index { [folder] }) - RTGTOOLS_VCFEVAL ( input, truth, sdf ) + RTGTOOLS_VCFEVAL ( input, truth, bed, sdf ) } diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 025581a6..86252c68 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -7,7 +7,6 @@ - path: output/rtgtools/test_results.txt md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 - path: output/rtgtools/versions.yml - md5sum: 270ed7a5a8e347b251eb4aa2198f98e8 - name: rtgtools vcfeval test_rtgtools_vcfeval_no_index command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_index -c tests/config/nextflow.config From 1b63d03f8e6d5b964c3f5215cb3c99f5604098ed Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 9 May 2022 15:51:19 +0200 Subject: [PATCH 09/26] adjusted the bed variable name --- modules/rtgtools/vcfeval/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index c7e8a12e..a2f5244f 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -10,7 +10,7 @@ process RTGTOOLS_VCFEVAL { input: tuple val(meta), path(query_vcf), path(query_vcf_tbi) tuple path(truth_vcf), path(truth_vcf_tbi) - path(bed) + path(truth_regions) path(sdf) output: @@ -23,7 +23,7 @@ process RTGTOOLS_VCFEVAL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def regions = bed ? "--bed-regions=$bed" : "" + def regions = truth_regions ? "--bed-regions=$truth_regions" : "" def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" From bd7e1414a453af7605a67dcc166e3a98aeec3689 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Mon, 9 May 2022 15:55:48 +0200 Subject: [PATCH 10/26] Fixed meta.yml --- modules/rtgtools/vcfeval/main.nf | 2 +- modules/rtgtools/vcfeval/meta.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index a2f5244f..1d981204 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -8,7 +8,7 @@ process RTGTOOLS_VCFEVAL { 'quay.io/biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" input: - tuple val(meta), path(query_vcf), path(query_vcf_tbi) + tuple val(meta), path(query_vcf), path(query_vcf_tbi) tuple path(truth_vcf), path(truth_vcf_tbi) path(truth_regions) path(sdf) diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index 36e1b290..baf9e87a 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -35,9 +35,9 @@ input: type: file description: The index of the called VCF (optional) pattern: "*.tbi" - - bed: + - truth_regions: type: file - description: The BED file of the called VCF + description: The BED file containing the truth regions pattern: "*.bed" - sdf: type: file From c69d5cc23e76d5560b763111a167a15d8cd08424 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 09:35:42 +0200 Subject: [PATCH 11/26] Added evaluation-regions and adjusted the output to now contain all output (and not only the summary) --- modules/rtgtools/vcfeval/main.nf | 14 ++--- modules/rtgtools/vcfeval/meta.yml | 10 ++-- tests/modules/rtgtools/vcfeval/main.nf | 14 +++-- tests/modules/rtgtools/vcfeval/test.yml | 69 ++++++++++++++++++++++--- 4 files changed, 87 insertions(+), 20 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 1d981204..4e27ed6b 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -11,19 +11,21 @@ process RTGTOOLS_VCFEVAL { tuple val(meta), path(query_vcf), path(query_vcf_tbi) tuple path(truth_vcf), path(truth_vcf_tbi) path(truth_regions) + path(evaluation_regions) path(sdf) output: - tuple val(meta), path("*.txt"), emit: results - path "versions.yml" , emit: versions + tuple val(meta), path("${task.ext.prefix ?: meta.id}/*") , emit: results + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - def regions = truth_regions ? "--bed-regions=$truth_regions" : "" + def bed_regions = truth_regions ? "--bed-regions=$truth_regions" : "" + def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : "" def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" @@ -34,12 +36,12 @@ process RTGTOOLS_VCFEVAL { rtg vcfeval \\ $args \\ --baseline=$truth_vcf \\ - $regions \\ + $bed_regions \\ + $eval_regions \\ --calls=$query_vcf \\ --output=$prefix \\ --template=$sdf \\ --threads=$task.cpus \\ - > ${prefix}_results.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index baf9e87a..1f448fdc 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -37,7 +37,11 @@ input: pattern: "*.tbi" - truth_regions: type: file - description: The BED file containing the truth regions + description: A BED file containining the strict regions where VCFeval should only evaluate the fully overlapping variants (optional) + pattern: "*.bed" + - evaluation_regions: + type: file + description: A BED file containing the regions where VCFeval will evaluate every fully and partially overlapping variant (optional) pattern: "*.bed" - sdf: type: file @@ -56,8 +60,8 @@ output: pattern: "versions.yml" - results: type: file - description: A text file containing the results of the benchmark - pattern: "*.txt" + description: A folder containing all results of the evaluation + pattern: "*" authors: - "@nvnieuwk" diff --git a/tests/modules/rtgtools/vcfeval/main.nf b/tests/modules/rtgtools/vcfeval/main.nf index 9a28e2d7..9a1c3c71 100644 --- a/tests/modules/rtgtools/vcfeval/main.nf +++ b/tests/modules/rtgtools/vcfeval/main.nf @@ -18,7 +18,9 @@ workflow test_rtgtools_vcfeval { file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] - bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + truth_regions = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + evaluation_regions = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) compressed_sdf = [ [], @@ -32,10 +34,10 @@ workflow test_rtgtools_vcfeval { }) - RTGTOOLS_VCFEVAL ( input, truth, bed, sdf ) + RTGTOOLS_VCFEVAL ( input, truth, truth_regions, evaluation_regions, sdf ) } -workflow test_rtgtools_vcfeval_no_index { +workflow test_rtgtools_vcfeval_no_optional_inputs { input = [ [ id:'test' ], // meta map @@ -48,7 +50,9 @@ workflow test_rtgtools_vcfeval_no_index { [] ] - bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + truth_regions = [] + + evaluation_regions = [] compressed_sdf = [ [], @@ -61,5 +65,5 @@ workflow test_rtgtools_vcfeval_no_index { [folder] }) - RTGTOOLS_VCFEVAL ( input, truth, bed, sdf ) + RTGTOOLS_VCFEVAL ( input, truth, truth_regions, evaluation_regions, sdf ) } diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 86252c68..06d39701 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -4,16 +4,73 @@ - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/test_results.txt - md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 + - path: output/rtgtools/test/done + md5sum: 8b5623b26ee9b8722816afbec270bff0 + - path: output/rtgtools/test/fn.vcf.gz + md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 + - path: output/rtgtools/test/fn.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/fp.vcf.gz + md5sum: 1417bb8ac7a0e202df660291a74de0db + - path: output/rtgtools/test/fp.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/non_snp_roc.tsv.gz + md5sum: 8fd51a1e5084d15d43880cb1e31a0180 + - path: output/rtgtools/test/phasing.txt + md5sum: 133677dbd8be657439ea2b03fdfb8795 + - path: output/rtgtools/test/progress + - path: output/rtgtools/test/snp_roc.tsv.gz + md5sum: ff2ece544adfcefaa06da054876a9ae3 + - path: output/rtgtools/test/summary.txt + md5sum: f4c8df93c8bdab603036bbc27b4a28c3 + - path: output/rtgtools/test/tp-baseline.vcf.gz + md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 + - path: output/rtgtools/test/tp-baseline.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/tp.vcf.gz + md5sum: 1417bb8ac7a0e202df660291a74de0db + - path: output/rtgtools/test/tp.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/vcfeval.log + - path: output/rtgtools/test/weighted_roc.tsv.gz + md5sum: 5209f1bdeb03704714ae92b183d08e0f - path: output/rtgtools/versions.yml -- name: rtgtools vcfeval test_rtgtools_vcfeval_no_index - command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_index -c tests/config/nextflow.config +- name: rtgtools vcfeval test_rtgtools_vcfeval_no_optional_inputs + command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_optional_inputs -c tests/config/nextflow.config tags: - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/test_results.txt - md5sum: 2e011aa6e54d258fcc3b45b2dda02ae4 + - path: output/rtgtools/test/done + md5sum: 8b5623b26ee9b8722816afbec270bff0 + - path: output/rtgtools/test/fn.vcf.gz + md5sum: e51420e4be520ae309a2384830bf4c15 + - path: output/rtgtools/test/fn.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/fp.vcf.gz + md5sum: 7d818cf526983de6cbb5cf517c44a8f7 + - path: output/rtgtools/test/fp.vcf.gz.tbi + md5sum: 092a7a3162e7cff25d273525751eb284 + - path: output/rtgtools/test/non_snp_roc.tsv.gz + md5sum: a535fb80081b43b19788347152b6b8b4 + - path: output/rtgtools/test/phasing.txt + md5sum: 133677dbd8be657439ea2b03fdfb8795 + - path: output/rtgtools/test/progress + - path: output/rtgtools/test/snp_roc.tsv.gz + md5sum: 6006656c4a534935c7873398287bc110 + - path: output/rtgtools/test/summary.txt + md5sum: f33feb32f84958fb931063044fba369b + - path: output/rtgtools/test/tp-baseline.vcf.gz + md5sum: ed68ea567a26d3b864ada79e9253bc97 + - path: output/rtgtools/test/tp-baseline.vcf.gz.tbi + md5sum: 3518deff814eed340b0f5386294b5879 + - path: output/rtgtools/test/tp.vcf.gz + md5sum: 92fd51021d101c99da066324655d24c9 + - path: output/rtgtools/test/tp.vcf.gz.tbi + md5sum: 169063c1f570f0055059f3cb3518a8b4 + - path: output/rtgtools/test/vcfeval.log + - path: output/rtgtools/test/weighted_roc.tsv.gz + md5sum: 0ba825084bd6b94accdf54fff23ea18c - path: output/rtgtools/versions.yml + From f0e8f6ce4d748c3eacceedc1f2821d885df47f70 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 09:36:54 +0200 Subject: [PATCH 12/26] Linting --- tests/modules/rtgtools/vcfeval/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 06d39701..c1cbf236 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -73,4 +73,3 @@ - path: output/rtgtools/test/weighted_roc.tsv.gz md5sum: 0ba825084bd6b94accdf54fff23ea18c - path: output/rtgtools/versions.yml - From 42fbf80c7273050a1c6de3e1d179bf1c8704ad15 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 09:57:49 +0200 Subject: [PATCH 13/26] Fixed a test issue with wrong md5sum --- tests/modules/rtgtools/vcfeval/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index c1cbf236..ea76cb15 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -5,7 +5,7 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - md5sum: 8b5623b26ee9b8722816afbec270bff0 + contains: - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - path: output/rtgtools/test/fn.vcf.gz.tbi @@ -43,7 +43,7 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - md5sum: 8b5623b26ee9b8722816afbec270bff0 + contains: - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: e51420e4be520ae309a2384830bf4c15 - path: output/rtgtools/test/fn.vcf.gz.tbi From 96c3f895b9c8bba0ea629bb31869a14aaf2e9c83 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 10:19:13 +0200 Subject: [PATCH 14/26] test issue fix? --- tests/modules/rtgtools/vcfeval/test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index ea76cb15..299fe91c 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -5,7 +5,8 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - contains: - Finished Succesfully in + contains: + - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - path: output/rtgtools/test/fn.vcf.gz.tbi @@ -43,7 +44,8 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - contains: - Finished Succesfully in + contains: + - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: e51420e4be520ae309a2384830bf4c15 - path: output/rtgtools/test/fn.vcf.gz.tbi From 60086196038fd5e0cf9006a9aa6d797cb9c4380a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 10:20:54 +0200 Subject: [PATCH 15/26] test issue fix? --- tests/modules/rtgtools/vcfeval/test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 299fe91c..194e0cac 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -5,8 +5,6 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - contains: - - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - path: output/rtgtools/test/fn.vcf.gz.tbi @@ -44,8 +42,6 @@ - rtgtools/vcfeval files: - path: output/rtgtools/test/done - contains: - - Finished Succesfully in - path: output/rtgtools/test/fn.vcf.gz md5sum: e51420e4be520ae309a2384830bf4c15 - path: output/rtgtools/test/fn.vcf.gz.tbi From 3a064b0a9952e6e0f00cc8570d2fd72b11aff449 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:05:42 +0200 Subject: [PATCH 16/26] Split the output to different channels --- modules/rtgtools/vcfeval/main.nf | 10 +++- modules/rtgtools/vcfeval/meta.yml | 66 ++++++++++++++++++++++-- tests/modules/rtgtools/vcfeval/test.yml | 68 ++++++++++++------------- 3 files changed, 105 insertions(+), 39 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 4e27ed6b..1963bfe0 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -15,8 +15,12 @@ process RTGTOOLS_VCFEVAL { path(sdf) output: - tuple val(meta), path("${task.ext.prefix ?: meta.id}/*") , emit: results - path "versions.yml" , emit: versions + tuple val(meta), path("done"), path("progress"), path("*.log") , emit: logs + tuple val(meta), path("*{tp,fn,fp,baseline}.vcf.gz.tbi"), path("*{tp,fn,fp,baseline}.vcf.gz") , emit: vcf + tuple val(meta), path("*.tsv.gz") , emit: roc + tuple val(meta), path("summary.txt") , emit: summary + tuple val(meta), path("phasing.txt") , emit: phasing + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -43,6 +47,8 @@ process RTGTOOLS_VCFEVAL { --template=$sdf \\ --threads=$task.cpus \\ + mv ${prefix}/* . + cat <<-END_VERSIONS > versions.yml "${task.process}": rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index 1f448fdc..e568852c 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -58,10 +58,70 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - results: + - done: type: file - description: A folder containing all results of the evaluation - pattern: "*" + description: A file containing the message on succesful completion + pattern: "done" + - progress: + type: file + description: A file containing the simplified logging of the process + pattern: "progress" + - vcfeval_log: + type: file + description: A file containing the extended logging of the process + pattern: "*.log" + - false_negatives_vcf: + type: file + description: A VCF file containing the false negative variants + pattern: "fn.vcf.gz" + - false_negatives_index: + type: file + description: The index file for the false negatives' VCF file + pattern: "fn.vcf.gz.tbi" + - false_positive_vcf: + type: file + description: A VCF file containing the false positive variants + pattern: "fp.vcf.gz" + - false_positives_index: + type: file + description: The index file for the false positives' VCF file + pattern: "fp.vcf.gz.tbi" + - true_positive_vcf: + type: file + description: A VCF file containing the true positive variants + pattern: "tp.vcf.gz" + - true_positives_index: + type: file + description: The index file for the true positives' VCF file + pattern: "tp.vcf.gz.tbi" + - true_positive_baseline_vcf: + type: file + description: A VCF file containing the true positive baseline variants + pattern: "tp-baseline.vcf.gz" + - true_positives_baseline_index: + type: file + description: The index file for the baseline true positives' VCF file + pattern: "tp-baseline.vcf.gz.tbi" + - non_snp_roc: + type: file + description: A TSV file containing the ROC data for the non-SNP variants + pattern: "non_snp_roc.tsv.gz" + - snp_roc: + type: file + description: A TSV file containing the ROC data for the SNP variants + pattern: "snp_roc.tsv.gz" + - weighted_roc: + type: file + description: A TSV file containing the weighted ROC data + pattern: "weighted_roc.tsv.gz" + - summary: + type: file + description: A TXT file containing the summary of the evaluation + pattern: "summary.txt" + - phasing: + type: file + description: A TXT file containing the data on the phasing + pattern: "phasing.txt" authors: - "@nvnieuwk" diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 194e0cac..70d014bf 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -4,36 +4,36 @@ - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/test/done - - path: output/rtgtools/test/fn.vcf.gz + - path: output/rtgtools/done + - path: output/rtgtools/fn.vcf.gz md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - - path: output/rtgtools/test/fn.vcf.gz.tbi + - path: output/rtgtools/fn.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/fp.vcf.gz + - path: output/rtgtools/fp.vcf.gz md5sum: 1417bb8ac7a0e202df660291a74de0db - - path: output/rtgtools/test/fp.vcf.gz.tbi + - path: output/rtgtools/fp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/non_snp_roc.tsv.gz + - path: output/rtgtools/non_snp_roc.tsv.gz md5sum: 8fd51a1e5084d15d43880cb1e31a0180 - - path: output/rtgtools/test/phasing.txt + - path: output/rtgtools/phasing.txt md5sum: 133677dbd8be657439ea2b03fdfb8795 - - path: output/rtgtools/test/progress - - path: output/rtgtools/test/snp_roc.tsv.gz + - path: output/rtgtools/progress + - path: output/rtgtools/snp_roc.tsv.gz md5sum: ff2ece544adfcefaa06da054876a9ae3 - - path: output/rtgtools/test/summary.txt + - path: output/rtgtools/summary.txt md5sum: f4c8df93c8bdab603036bbc27b4a28c3 - - path: output/rtgtools/test/tp-baseline.vcf.gz + - path: output/rtgtools/tp-baseline.vcf.gz md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - - path: output/rtgtools/test/tp-baseline.vcf.gz.tbi + - path: output/rtgtools/tp-baseline.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/tp.vcf.gz + - path: output/rtgtools/tp.vcf.gz md5sum: 1417bb8ac7a0e202df660291a74de0db - - path: output/rtgtools/test/tp.vcf.gz.tbi + - path: output/rtgtools/tp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/vcfeval.log - - path: output/rtgtools/test/weighted_roc.tsv.gz - md5sum: 5209f1bdeb03704714ae92b183d08e0f + - path: output/rtgtools/vcfeval.log - path: output/rtgtools/versions.yml + - path: output/rtgtools/weighted_roc.tsv.gz + md5sum: 5209f1bdeb03704714ae92b183d08e0f - name: rtgtools vcfeval test_rtgtools_vcfeval_no_optional_inputs command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_optional_inputs -c tests/config/nextflow.config @@ -41,33 +41,33 @@ - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/test/done - - path: output/rtgtools/test/fn.vcf.gz + - path: output/rtgtools/done + - path: output/rtgtools/fn.vcf.gz md5sum: e51420e4be520ae309a2384830bf4c15 - - path: output/rtgtools/test/fn.vcf.gz.tbi + - path: output/rtgtools/fn.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/fp.vcf.gz + - path: output/rtgtools/fp.vcf.gz md5sum: 7d818cf526983de6cbb5cf517c44a8f7 - - path: output/rtgtools/test/fp.vcf.gz.tbi + - path: output/rtgtools/fp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/test/non_snp_roc.tsv.gz + - path: output/rtgtools/non_snp_roc.tsv.gz md5sum: a535fb80081b43b19788347152b6b8b4 - - path: output/rtgtools/test/phasing.txt + - path: output/rtgtools/phasing.txt md5sum: 133677dbd8be657439ea2b03fdfb8795 - - path: output/rtgtools/test/progress - - path: output/rtgtools/test/snp_roc.tsv.gz + - path: output/rtgtools/progress + - path: output/rtgtools/snp_roc.tsv.gz md5sum: 6006656c4a534935c7873398287bc110 - - path: output/rtgtools/test/summary.txt + - path: output/rtgtools/summary.txt md5sum: f33feb32f84958fb931063044fba369b - - path: output/rtgtools/test/tp-baseline.vcf.gz + - path: output/rtgtools/tp-baseline.vcf.gz md5sum: ed68ea567a26d3b864ada79e9253bc97 - - path: output/rtgtools/test/tp-baseline.vcf.gz.tbi + - path: output/rtgtools/tp-baseline.vcf.gz.tbi md5sum: 3518deff814eed340b0f5386294b5879 - - path: output/rtgtools/test/tp.vcf.gz + - path: output/rtgtools/tp.vcf.gz md5sum: 92fd51021d101c99da066324655d24c9 - - path: output/rtgtools/test/tp.vcf.gz.tbi + - path: output/rtgtools/tp.vcf.gz.tbi md5sum: 169063c1f570f0055059f3cb3518a8b4 - - path: output/rtgtools/test/vcfeval.log - - path: output/rtgtools/test/weighted_roc.tsv.gz - md5sum: 0ba825084bd6b94accdf54fff23ea18c + - path: output/rtgtools/vcfeval.log - path: output/rtgtools/versions.yml + - path: output/rtgtools/weighted_roc.tsv.gz + md5sum: 0ba825084bd6b94accdf54fff23ea18c From 5db26f9f3588a1934d64690c0627a22adee57d76 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:07:11 +0200 Subject: [PATCH 17/26] Linting --- modules/rtgtools/vcfeval/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 1963bfe0..b32d544b 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -17,7 +17,7 @@ process RTGTOOLS_VCFEVAL { output: tuple val(meta), path("done"), path("progress"), path("*.log") , emit: logs tuple val(meta), path("*{tp,fn,fp,baseline}.vcf.gz.tbi"), path("*{tp,fn,fp,baseline}.vcf.gz") , emit: vcf - tuple val(meta), path("*.tsv.gz") , emit: roc + tuple val(meta), path("*.tsv.gz") , emit: roc tuple val(meta), path("summary.txt") , emit: summary tuple val(meta), path("phasing.txt") , emit: phasing path "versions.yml" , emit: versions From f3d5584ab0e277310a147ff5ed165c3a9203c6f4 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:08:23 +0200 Subject: [PATCH 18/26] Linting --- modules/rtgtools/vcfeval/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index b32d544b..826fec43 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -17,7 +17,7 @@ process RTGTOOLS_VCFEVAL { output: tuple val(meta), path("done"), path("progress"), path("*.log") , emit: logs tuple val(meta), path("*{tp,fn,fp,baseline}.vcf.gz.tbi"), path("*{tp,fn,fp,baseline}.vcf.gz") , emit: vcf - tuple val(meta), path("*.tsv.gz") , emit: roc + tuple val(meta), path("*.tsv.gz") , emit: roc tuple val(meta), path("summary.txt") , emit: summary tuple val(meta), path("phasing.txt") , emit: phasing path "versions.yml" , emit: versions From ef4857ec541864f41d9068a2a31b20e9b24953fb Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:20:01 +0200 Subject: [PATCH 19/26] Updated meta.yml --- modules/rtgtools/vcfeval/meta.yml | 62 +++++-------------------------- 1 file changed, 9 insertions(+), 53 deletions(-) diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index e568852c..587c2d3e 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -58,62 +58,18 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - done: + - logging: type: file - description: A file containing the message on succesful completion - pattern: "done" - - progress: + description: Files containing logging from vcfeval + pattern: "*{done,progress,.log}" + - vcf: type: file - description: A file containing the simplified logging of the process - pattern: "progress" - - vcfeval_log: + description: VCF files containing the false negatives, false positives and true positives with their index files + pattern: "*.vcf{.gz,.gz.tbi}" + - roc: type: file - description: A file containing the extended logging of the process - pattern: "*.log" - - false_negatives_vcf: - type: file - description: A VCF file containing the false negative variants - pattern: "fn.vcf.gz" - - false_negatives_index: - type: file - description: The index file for the false negatives' VCF file - pattern: "fn.vcf.gz.tbi" - - false_positive_vcf: - type: file - description: A VCF file containing the false positive variants - pattern: "fp.vcf.gz" - - false_positives_index: - type: file - description: The index file for the false positives' VCF file - pattern: "fp.vcf.gz.tbi" - - true_positive_vcf: - type: file - description: A VCF file containing the true positive variants - pattern: "tp.vcf.gz" - - true_positives_index: - type: file - description: The index file for the true positives' VCF file - pattern: "tp.vcf.gz.tbi" - - true_positive_baseline_vcf: - type: file - description: A VCF file containing the true positive baseline variants - pattern: "tp-baseline.vcf.gz" - - true_positives_baseline_index: - type: file - description: The index file for the baseline true positives' VCF file - pattern: "tp-baseline.vcf.gz.tbi" - - non_snp_roc: - type: file - description: A TSV file containing the ROC data for the non-SNP variants - pattern: "non_snp_roc.tsv.gz" - - snp_roc: - type: file - description: A TSV file containing the ROC data for the SNP variants - pattern: "snp_roc.tsv.gz" - - weighted_roc: - type: file - description: A TSV file containing the weighted ROC data - pattern: "weighted_roc.tsv.gz" + description: TSV files containing ROC data for the evaluated variants + pattern: "*.tsv.gz" - summary: type: file description: A TXT file containing the summary of the evaluation From 29f2d3c28417f935a2688bf5ea9ad8f0e25da679 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Tue, 10 May 2022 12:22:49 +0200 Subject: [PATCH 20/26] Update modules/rtgtools/vcfeval/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/rtgtools/vcfeval/main.nf | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 826fec43..a34f3514 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -15,11 +15,14 @@ process RTGTOOLS_VCFEVAL { path(sdf) output: - tuple val(meta), path("done"), path("progress"), path("*.log") , emit: logs - tuple val(meta), path("*{tp,fn,fp,baseline}.vcf.gz.tbi"), path("*{tp,fn,fp,baseline}.vcf.gz") , emit: vcf - tuple val(meta), path("*.tsv.gz") , emit: roc - tuple val(meta), path("summary.txt") , emit: summary - tuple val(meta), path("phasing.txt") , emit: phasing + tuple val(meta), path("**{done,progress,.log}") , emit: logs + tuple val(meta), path("**tp.vcf.gz"), path("**tp.vcf.gz.tbi") , emit: tp + tuple val(meta), path("**fn.vcf.gz"), path("**fn.vcf.gz.tbi") , emit: fn + tuple val(meta), path("**fp.vcf.gz"), path("**fp.vcf.gz.tbi") , emit: fp + tuple val(meta), path("**baseline.vcf.gz"), path("**baseline.vcf.gz.tbi") , emit: baseline + tuple val(meta), path("**.tsv.gz") , emit: roc + tuple val(meta), path("**summary.txt") , emit: summary + tuple val(meta), path("**phasing.txt") , emit: phasing path "versions.yml" , emit: versions when: From 2eb77bd64e62f6c050fb95cc7bbc402240c3e7d8 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Tue, 10 May 2022 12:22:56 +0200 Subject: [PATCH 21/26] Update modules/rtgtools/vcfeval/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/rtgtools/vcfeval/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index a34f3514..8ab73dec 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -50,7 +50,6 @@ process RTGTOOLS_VCFEVAL { --template=$sdf \\ --threads=$task.cpus \\ - mv ${prefix}/* . cat <<-END_VERSIONS > versions.yml "${task.process}": From c18391aa4983cd801211c34b4493f330ffab1a07 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:26:13 +0200 Subject: [PATCH 22/26] Updated meta.yml --- modules/rtgtools/vcfeval/meta.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index 587c2d3e..c2d286c8 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -62,10 +62,22 @@ output: type: file description: Files containing logging from vcfeval pattern: "*{done,progress,.log}" - - vcf: + - true_positives: type: file - description: VCF files containing the false negatives, false positives and true positives with their index files - pattern: "*.vcf{.gz,.gz.tbi}" + description: VCF files containing true positives with their index files + pattern: "tp.vcf{.gz,.gz.tbi}" + - true_positives_baseline: + type: file + description: VCF files containing baseline true positives with their index files + pattern: "tp-baseline.vcf{.gz,.gz.tbi}" + - false_positives: + type: file + description: VCF files containing false positives with their index files + pattern: "fp.vcf{.gz,.gz.tbi}" + - false_negatives: + type: file + description: VCF files containing false negatives with their index files + pattern: "fn.vcf{.gz,.gz.tbi}" - roc: type: file description: TSV files containing ROC data for the evaluated variants From b7134855e41d77b0fee9349c9b5eb43687e74925 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 12:34:52 +0200 Subject: [PATCH 23/26] Small update on the output channels --- modules/rtgtools/vcfeval/main.nf | 8 +- tests/modules/rtgtools/vcfeval/test.yml | 98 +++++++++++++------------ 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 8ab73dec..dd50c54c 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -15,14 +15,14 @@ process RTGTOOLS_VCFEVAL { path(sdf) output: - tuple val(meta), path("**{done,progress,.log}") , emit: logs + tuple val(meta), path("**results/{done,progress,*.log}") , emit: logs tuple val(meta), path("**tp.vcf.gz"), path("**tp.vcf.gz.tbi") , emit: tp tuple val(meta), path("**fn.vcf.gz"), path("**fn.vcf.gz.tbi") , emit: fn tuple val(meta), path("**fp.vcf.gz"), path("**fp.vcf.gz.tbi") , emit: fp tuple val(meta), path("**baseline.vcf.gz"), path("**baseline.vcf.gz.tbi") , emit: baseline tuple val(meta), path("**.tsv.gz") , emit: roc - tuple val(meta), path("**summary.txt") , emit: summary - tuple val(meta), path("**phasing.txt") , emit: phasing + tuple val(meta), path("**results/summary.txt") , emit: summary + tuple val(meta), path("**results/phasing.txt") , emit: phasing path "versions.yml" , emit: versions when: @@ -46,7 +46,7 @@ process RTGTOOLS_VCFEVAL { $bed_regions \\ $eval_regions \\ --calls=$query_vcf \\ - --output=$prefix \\ + --output=${prefix}_results \\ --template=$sdf \\ --threads=$task.cpus \\ diff --git a/tests/modules/rtgtools/vcfeval/test.yml b/tests/modules/rtgtools/vcfeval/test.yml index 70d014bf..33720f66 100644 --- a/tests/modules/rtgtools/vcfeval/test.yml +++ b/tests/modules/rtgtools/vcfeval/test.yml @@ -4,36 +4,37 @@ - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/done - - path: output/rtgtools/fn.vcf.gz - md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - - path: output/rtgtools/fn.vcf.gz.tbi + - path: output/rtgtools/test_results/done + - path: output/rtgtools/test_results/fn.vcf.gz + md5sum: be9c9106055bfad4c5985bc0d33efd56 + - path: output/rtgtools/test_results/fn.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/fp.vcf.gz - md5sum: 1417bb8ac7a0e202df660291a74de0db - - path: output/rtgtools/fp.vcf.gz.tbi + - path: output/rtgtools/test_results/fp.vcf.gz + md5sum: e0f0ff841dc63e9fb61fd3a5db137ced + - path: output/rtgtools/test_results/fp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/non_snp_roc.tsv.gz - md5sum: 8fd51a1e5084d15d43880cb1e31a0180 - - path: output/rtgtools/phasing.txt + - path: output/rtgtools/test_results/non_snp_roc.tsv.gz + md5sum: ad5bad32c48f05aef232e2c0e708877a + - path: output/rtgtools/test_results/phasing.txt md5sum: 133677dbd8be657439ea2b03fdfb8795 - - path: output/rtgtools/progress - - path: output/rtgtools/snp_roc.tsv.gz - md5sum: ff2ece544adfcefaa06da054876a9ae3 - - path: output/rtgtools/summary.txt + - path: output/rtgtools/test_results/progress + - path: output/rtgtools/test_results/snp_roc.tsv.gz + md5sum: 6785b83d66486e7e6c75c5a5b1574c09 + - path: output/rtgtools/test_results/summary.txt md5sum: f4c8df93c8bdab603036bbc27b4a28c3 - - path: output/rtgtools/tp-baseline.vcf.gz - md5sum: 4577a8c3226b9f8ed9e260c3bd4b1259 - - path: output/rtgtools/tp-baseline.vcf.gz.tbi + - path: output/rtgtools/test_results/tp-baseline.vcf.gz + md5sum: be9c9106055bfad4c5985bc0d33efd56 + - path: output/rtgtools/test_results/tp-baseline.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/tp.vcf.gz - md5sum: 1417bb8ac7a0e202df660291a74de0db - - path: output/rtgtools/tp.vcf.gz.tbi + - path: output/rtgtools/test_results/tp.vcf.gz + md5sum: e0f0ff841dc63e9fb61fd3a5db137ced + - path: output/rtgtools/test_results/tp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/vcfeval.log + - path: output/rtgtools/test_results/vcfeval.log + - path: output/rtgtools/test_results/weighted_roc.tsv.gz + md5sum: fa7c046ea0084172f1ef91f19de07b2b - path: output/rtgtools/versions.yml - - path: output/rtgtools/weighted_roc.tsv.gz - md5sum: 5209f1bdeb03704714ae92b183d08e0f + md5sum: 270ed7a5a8e347b251eb4aa2198f98e8 - name: rtgtools vcfeval test_rtgtools_vcfeval_no_optional_inputs command: nextflow run tests/modules/rtgtools/vcfeval -entry test_rtgtools_vcfeval_no_optional_inputs -c tests/config/nextflow.config @@ -41,33 +42,34 @@ - rtgtools - rtgtools/vcfeval files: - - path: output/rtgtools/done - - path: output/rtgtools/fn.vcf.gz - md5sum: e51420e4be520ae309a2384830bf4c15 - - path: output/rtgtools/fn.vcf.gz.tbi + - path: output/rtgtools/test_results/done + - path: output/rtgtools/test_results/fn.vcf.gz + md5sum: c11c889a4f42c8ea325748bd768ea34d + - path: output/rtgtools/test_results/fn.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/fp.vcf.gz - md5sum: 7d818cf526983de6cbb5cf517c44a8f7 - - path: output/rtgtools/fp.vcf.gz.tbi + - path: output/rtgtools/test_results/fp.vcf.gz + md5sum: 138e85c1cd79f8fea9a33e81ce0c734c + - path: output/rtgtools/test_results/fp.vcf.gz.tbi md5sum: 092a7a3162e7cff25d273525751eb284 - - path: output/rtgtools/non_snp_roc.tsv.gz - md5sum: a535fb80081b43b19788347152b6b8b4 - - path: output/rtgtools/phasing.txt + - path: output/rtgtools/test_results/non_snp_roc.tsv.gz + md5sum: 34fb78a008dfc0bef02807b8a7012b07 + - path: output/rtgtools/test_results/phasing.txt md5sum: 133677dbd8be657439ea2b03fdfb8795 - - path: output/rtgtools/progress - - path: output/rtgtools/snp_roc.tsv.gz - md5sum: 6006656c4a534935c7873398287bc110 - - path: output/rtgtools/summary.txt + - path: output/rtgtools/test_results/progress + - path: output/rtgtools/test_results/snp_roc.tsv.gz + md5sum: a4c5761c2653e2d04fc84c1cea13b1f0 + - path: output/rtgtools/test_results/summary.txt md5sum: f33feb32f84958fb931063044fba369b - - path: output/rtgtools/tp-baseline.vcf.gz - md5sum: ed68ea567a26d3b864ada79e9253bc97 - - path: output/rtgtools/tp-baseline.vcf.gz.tbi - md5sum: 3518deff814eed340b0f5386294b5879 - - path: output/rtgtools/tp.vcf.gz - md5sum: 92fd51021d101c99da066324655d24c9 - - path: output/rtgtools/tp.vcf.gz.tbi - md5sum: 169063c1f570f0055059f3cb3518a8b4 - - path: output/rtgtools/vcfeval.log + - path: output/rtgtools/test_results/tp-baseline.vcf.gz + md5sum: d1c2d990899edf127ea5fcca8866fcb0 + - path: output/rtgtools/test_results/tp-baseline.vcf.gz.tbi + md5sum: 3307008fea47adb75c46d395c5567bc0 + - path: output/rtgtools/test_results/tp.vcf.gz + md5sum: e35b4dab82894eee9b77c81f9bc89cca + - path: output/rtgtools/test_results/tp.vcf.gz.tbi + md5sum: 45d8f8793140944f129e728299918c88 + - path: output/rtgtools/test_results/vcfeval.log + - path: output/rtgtools/test_results/weighted_roc.tsv.gz + md5sum: 5b8efc9e9381f604880412800f58e4e9 - path: output/rtgtools/versions.yml - - path: output/rtgtools/weighted_roc.tsv.gz - md5sum: 0ba825084bd6b94accdf54fff23ea18c + md5sum: 55568e4bbe5ab7e634a1f392abb89cc4 From 7fc4cc5a382214c86ec3577da0cb59823d9becb7 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Tue, 10 May 2022 13:23:22 +0200 Subject: [PATCH 24/26] Update modules/rtgtools/vcfeval/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/rtgtools/vcfeval/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index dd50c54c..67640e87 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -33,8 +33,8 @@ process RTGTOOLS_VCFEVAL { def prefix = task.ext.prefix ?: "${meta.id}" def bed_regions = truth_regions ? "--bed-regions=$truth_regions" : "" def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : "" - def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" - def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" + def truth_index = truth_vcf_tbi ?: "rtg index $truth_vcf" + def query_index = query_vcf_tbi ?: "rtg index $query_vcf" """ $truth_index From d3927aae995dbccc4c55d93d96cb4d3fd9e5c6f2 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Tue, 10 May 2022 13:26:14 +0200 Subject: [PATCH 25/26] Update modules/rtgtools/vcfeval/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/rtgtools/vcfeval/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index 67640e87..dd50c54c 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -33,8 +33,8 @@ process RTGTOOLS_VCFEVAL { def prefix = task.ext.prefix ?: "${meta.id}" def bed_regions = truth_regions ? "--bed-regions=$truth_regions" : "" def eval_regions = evaluation_regions ? "--evaluation-regions=$evaluation_regions" : "" - def truth_index = truth_vcf_tbi ?: "rtg index $truth_vcf" - def query_index = query_vcf_tbi ?: "rtg index $query_vcf" + def truth_index = truth_vcf_tbi ? "" : "rtg index $truth_vcf" + def query_index = query_vcf_tbi ? "" : "rtg index $query_vcf" """ $truth_index From fa7c947c35b153ce58ca4806b208b382ecb9a712 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 10 May 2022 13:33:37 +0200 Subject: [PATCH 26/26] Adjusted meta --- modules/rtgtools/vcfeval/main.nf | 4 ++-- modules/rtgtools/vcfeval/meta.yml | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/rtgtools/vcfeval/main.nf b/modules/rtgtools/vcfeval/main.nf index dd50c54c..1bad4231 100644 --- a/modules/rtgtools/vcfeval/main.nf +++ b/modules/rtgtools/vcfeval/main.nf @@ -20,10 +20,10 @@ process RTGTOOLS_VCFEVAL { tuple val(meta), path("**fn.vcf.gz"), path("**fn.vcf.gz.tbi") , emit: fn tuple val(meta), path("**fp.vcf.gz"), path("**fp.vcf.gz.tbi") , emit: fp tuple val(meta), path("**baseline.vcf.gz"), path("**baseline.vcf.gz.tbi") , emit: baseline - tuple val(meta), path("**.tsv.gz") , emit: roc + tuple val(meta), path("**.tsv.gz") , emit: roc tuple val(meta), path("**results/summary.txt") , emit: summary tuple val(meta), path("**results/phasing.txt") , emit: phasing - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/rtgtools/vcfeval/meta.yml b/modules/rtgtools/vcfeval/meta.yml index c2d286c8..5a5f452e 100644 --- a/modules/rtgtools/vcfeval/meta.yml +++ b/modules/rtgtools/vcfeval/meta.yml @@ -62,21 +62,21 @@ output: type: file description: Files containing logging from vcfeval pattern: "*{done,progress,.log}" - - true_positives: + - tp: type: file - description: VCF files containing true positives with their index files + description: A tuple containing the VCF and TBI file for the true positive variants pattern: "tp.vcf{.gz,.gz.tbi}" - - true_positives_baseline: + - baseline: type: file - description: VCF files containing baseline true positives with their index files + description: A tuple containing the VCF and TBI file for the baseline true positive variants pattern: "tp-baseline.vcf{.gz,.gz.tbi}" - - false_positives: + - fp: type: file - description: VCF files containing false positives with their index files + description: A tuple containing the VCF and TBI file for the false positive variants pattern: "fp.vcf{.gz,.gz.tbi}" - - false_negatives: + - fn: type: file - description: VCF files containing false negatives with their index files + description: A tuple containing the VCF and TBI file for the false negative variants pattern: "fn.vcf{.gz,.gz.tbi}" - roc: type: file