refactored vcfanno (#1266)

* final refactor: using tarball instead
2024-12-22 02:58:17 +00:00 · 2022-02-21 11:49:21 +01:00 · 2022-02-21 11:49:21 +01:00 · 2597c31d6b
commit 2597c31d6b
parent 15d39f841a
6 changed files with 71 additions and 24 deletions
--- a/modules/vcfanno/main.nf
+++ b/modules/vcfanno/main.nf
@ -9,11 +9,13 @@ process VCFANNO {
    input:
    tuple val(meta), path(vcf), path(tbi)
-    path vcfanno_config
+    tuple val(meta), path(vcf_uncompressed)
    path toml
    path resource_dir
    output:
-    tuple val(meta), path("*.vcf"), emit: vcf
+    tuple val(meta), path("*_annotated.vcf"), emit: vcf
-    path "versions.yml"           , emit: versions
+    path "versions.yml"                     , emit: versions
    when:
    task.ext.when == null || task.ext.when
@ -21,12 +23,15 @@ process VCFANNO {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input_vcf = vcf_uncompressed ?: vcf
    """
    ln -sf $resource_dir/* \$(pwd)
    vcfanno \\
        -p $task.cpus \\
        $args \\
-        $vcfanno_config \\
+        $toml \\
-        $vcf \\
+        $input_vcf \\
        > ${prefix}_annotated.vcf
    cat <<-END_VERSIONS > versions.yml
--- a/modules/vcfanno/meta.yml
+++ b/modules/vcfanno/meta.yml
@ -5,6 +5,8 @@ keywords:
  - bed
  - annotate
  - variant
  - lua
  - toml
 tools:
  - vcfanno:
      description: annotate a VCF with other VCFs/BEDs/tabixed files
@ -23,19 +25,21 @@ input:
  - vcf:
      type: file
      description: query VCF file
-      pattern: "*.{vcf.gz}"
+      pattern: "*.{vcf, vcf.gz}"
-  - vcf.tbi:
+  - vcf_tabix:
      type: file
-      description: query VCF file index
+      description: tabix index of query VCF - only needed if vcf is compressed
-      pattern: "*.{vcf.gz.tbi}"
+      pattern: "*.vcf.gz.tbi"
-  - vcfanno_config:
+  - toml:
      type: file
      description: configuration file
      pattern: "*.toml"
  - resource_dir:
      type: file
      description: |
-        A simple configuration file is used to specify both the source files
+        This directory contains referenced files in the TOML config,
-        and the set of attributes (in the case of VCF)
+        and the corresponding indicies e.g. exac.vcf.gz + exac.vcf.gz.tbi,
-        or columns (in the case of BED or other tab-delimited formats)
+        with exception to the lua file.
        that should be added to the query file.
      pattern: "*.{toml}"
 output:
  - meta:
@ -50,7 +54,7 @@ output:
  - vcf:
      type: file
      description: Annotated VCF file
-      pattern: "*.{vcf}"
+      pattern: "*.vcf"
 authors:
  - "@projectoriented"
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@ -154,7 +154,9 @@ params {
                justhusky_ped                                  = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped"
                justhusky_minimal_vcf_gz                       = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz"
                justhusky_minimal_vcf_gz_tbi                   = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi"
-
+                
                vcfanno_tar_gz                                 = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz"
                vcfanno_toml                                   = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml"
            }
            'pangenome' {
                pangenome_fa                                  = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa"
--- a/tests/modules/vcfanno/main.nf
+++ b/tests/modules/vcfanno/main.nf
@ -2,18 +2,40 @@
 nextflow.enable.dsl = 2
 include { UNTAR } from '../../../modules/untar/main.nf'
 include { VCFANNO } from '../../../modules/vcfanno/main.nf'
 workflow test_vcfanno {
    input = [ 
-        [ id:'test', single_end:false ], // meta map
+        [ id:'test_compressed', single_end:false ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
    ]
    input_2 = [ [ id:'test_compressed', single_end:false ], // meta map 
                [] ]
-    toml = file("https://raw.githubusercontent.com/nf-core/test-datasets/8fbd9f99a2feb3f9e39cd3bcdc4a9176a5835673/data/delete_me/vcfanno.toml", 
+    toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
-                checkIfExists: true)
+    resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
-    VCFANNO ( input, toml )
+    UNTAR ( resource_dir )
    VCFANNO ( input, input_2, toml, UNTAR.out.untar )
 }
 workflow test_vcfanno_uncompressed {
    input = [ [ id:'test_uncompressed', single_end:false ], // meta map
                [] ,[] ]
    input_2 = [ 
        [ id:'test_uncompressed', single_end:false ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
    ]
    toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
    resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
    UNTAR ( resource_dir )
    VCFANNO ( input, input_2, toml, UNTAR.out.untar )
 }
--- a/tests/modules/vcfanno/nextflow.config
+++ b/tests/modules/vcfanno/nextflow.config
@ -1,5 +1,5 @@
 process {
-
+    
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/modules/vcfanno/test.yml
+++ b/tests/modules/vcfanno/test.yml
@ -3,7 +3,21 @@
  tags:
    - vcfanno
  files:
-    - path: output/vcfanno/test_annotated.vcf
+    - path: output/untar/versions.yml
-      md5sum: 34259cf6b0a4698a2917ad3554b50c0f
+      md5sum: 6de038155bccbe4d264d09529bf9f4d8
    - path: output/vcfanno/test_compressed_annotated.vcf
      md5sum: 90ceb2fd2e06e781846d69c3981db665
    - path: output/vcfanno/versions.yml
      md5sum: 62d13540503b22f04a2280c91942cb03
 - name: vcfanno test_vcfanno_uncompressed
  command: nextflow run tests/modules/vcfanno -entry test_vcfanno_uncompressed -c tests/config/nextflow.config
  tags:
    - vcfanno
  files:
    - path: output/untar/versions.yml
      md5sum: a0de70274caac0cca31b077a1e7172d9
    - path: output/vcfanno/test_uncompressed_annotated.vcf
      md5sum: 90ceb2fd2e06e781846d69c3981db665
    - path: output/vcfanno/versions.yml
      md5sum: 300218dad68c3ffcc4783daa4f7c5a43