refactored vcfanno (#1266)

* final refactor: using tarball instead
This commit is contained in:
Mei Wu 2022-02-21 11:49:21 +01:00 committed by GitHub
parent 15d39f841a
commit 2597c31d6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 71 additions and 24 deletions

View file

@ -9,11 +9,13 @@ process VCFANNO {
input:
tuple val(meta), path(vcf), path(tbi)
path vcfanno_config
tuple val(meta), path(vcf_uncompressed)
path toml
path resource_dir
output:
tuple val(meta), path("*.vcf"), emit: vcf
path "versions.yml" , emit: versions
tuple val(meta), path("*_annotated.vcf"), emit: vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@ -21,12 +23,15 @@ process VCFANNO {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_vcf = vcf_uncompressed ?: vcf
"""
ln -sf $resource_dir/* \$(pwd)
vcfanno \\
-p $task.cpus \\
$args \\
$vcfanno_config \\
$vcf \\
$toml \\
$input_vcf \\
> ${prefix}_annotated.vcf
cat <<-END_VERSIONS > versions.yml

View file

@ -5,6 +5,8 @@ keywords:
- bed
- annotate
- variant
- lua
- toml
tools:
- vcfanno:
description: annotate a VCF with other VCFs/BEDs/tabixed files
@ -23,19 +25,21 @@ input:
- vcf:
type: file
description: query VCF file
pattern: "*.{vcf.gz}"
- vcf.tbi:
pattern: "*.{vcf, vcf.gz}"
- vcf_tabix:
type: file
description: query VCF file index
pattern: "*.{vcf.gz.tbi}"
- vcfanno_config:
description: tabix index of query VCF - only needed if vcf is compressed
pattern: "*.vcf.gz.tbi"
- toml:
type: file
description: configuration file
pattern: "*.toml"
- resource_dir:
type: file
description: |
A simple configuration file is used to specify both the source files
and the set of attributes (in the case of VCF)
or columns (in the case of BED or other tab-delimited formats)
that should be added to the query file.
pattern: "*.{toml}"
This directory contains referenced files in the TOML config,
and the corresponding indicies e.g. exac.vcf.gz + exac.vcf.gz.tbi,
with exception to the lua file.
output:
- meta:
@ -50,7 +54,7 @@ output:
- vcf:
type: file
description: Annotated VCF file
pattern: "*.{vcf}"
pattern: "*.vcf"
authors:
- "@projectoriented"

View file

@ -154,7 +154,9 @@ params {
justhusky_ped = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped"
justhusky_minimal_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz"
justhusky_minimal_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi"
vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz"
vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml"
}
'pangenome' {
pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa"

View file

@ -2,18 +2,40 @@
nextflow.enable.dsl = 2
include { UNTAR } from '../../../modules/untar/main.nf'
include { VCFANNO } from '../../../modules/vcfanno/main.nf'
workflow test_vcfanno {
input = [
[ id:'test', single_end:false ], // meta map
[ id:'test_compressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
]
input_2 = [ [ id:'test_compressed', single_end:false ], // meta map
[] ]
toml = file("https://raw.githubusercontent.com/nf-core/test-datasets/8fbd9f99a2feb3f9e39cd3bcdc4a9176a5835673/data/delete_me/vcfanno.toml",
checkIfExists: true)
toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
VCFANNO ( input, toml )
UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar )
}
workflow test_vcfanno_uncompressed {
input = [ [ id:'test_uncompressed', single_end:false ], // meta map
[] ,[] ]
input_2 = [
[ id:'test_uncompressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar )
}

View file

@ -1,5 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -3,7 +3,21 @@
tags:
- vcfanno
files:
- path: output/vcfanno/test_annotated.vcf
md5sum: 34259cf6b0a4698a2917ad3554b50c0f
- path: output/untar/versions.yml
md5sum: 6de038155bccbe4d264d09529bf9f4d8
- path: output/vcfanno/test_compressed_annotated.vcf
md5sum: 90ceb2fd2e06e781846d69c3981db665
- path: output/vcfanno/versions.yml
md5sum: 62d13540503b22f04a2280c91942cb03
- name: vcfanno test_vcfanno_uncompressed
command: nextflow run tests/modules/vcfanno -entry test_vcfanno_uncompressed -c tests/config/nextflow.config
tags:
- vcfanno
files:
- path: output/untar/versions.yml
md5sum: a0de70274caac0cca31b077a1e7172d9
- path: output/vcfanno/test_uncompressed_annotated.vcf
md5sum: 90ceb2fd2e06e781846d69c3981db665
- path: output/vcfanno/versions.yml
md5sum: 300218dad68c3ffcc4783daa4f7c5a43