refactored vcfanno (#1266)

* final refactor: using tarball instead
This commit is contained in:
Mei Wu 2022-02-21 11:49:21 +01:00 committed by GitHub
parent 15d39f841a
commit 2597c31d6b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 71 additions and 24 deletions

View file

@ -9,11 +9,13 @@ process VCFANNO {
input: input:
tuple val(meta), path(vcf), path(tbi) tuple val(meta), path(vcf), path(tbi)
path vcfanno_config tuple val(meta), path(vcf_uncompressed)
path toml
path resource_dir
output: output:
tuple val(meta), path("*.vcf"), emit: vcf tuple val(meta), path("*_annotated.vcf"), emit: vcf
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
task.ext.when == null || task.ext.when task.ext.when == null || task.ext.when
@ -21,12 +23,15 @@ process VCFANNO {
script: script:
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
def input_vcf = vcf_uncompressed ?: vcf
""" """
ln -sf $resource_dir/* \$(pwd)
vcfanno \\ vcfanno \\
-p $task.cpus \\ -p $task.cpus \\
$args \\ $args \\
$vcfanno_config \\ $toml \\
$vcf \\ $input_vcf \\
> ${prefix}_annotated.vcf > ${prefix}_annotated.vcf
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml

View file

@ -5,6 +5,8 @@ keywords:
- bed - bed
- annotate - annotate
- variant - variant
- lua
- toml
tools: tools:
- vcfanno: - vcfanno:
description: annotate a VCF with other VCFs/BEDs/tabixed files description: annotate a VCF with other VCFs/BEDs/tabixed files
@ -23,19 +25,21 @@ input:
- vcf: - vcf:
type: file type: file
description: query VCF file description: query VCF file
pattern: "*.{vcf.gz}" pattern: "*.{vcf, vcf.gz}"
- vcf.tbi: - vcf_tabix:
type: file type: file
description: query VCF file index description: tabix index of query VCF - only needed if vcf is compressed
pattern: "*.{vcf.gz.tbi}" pattern: "*.vcf.gz.tbi"
- vcfanno_config: - toml:
type: file
description: configuration file
pattern: "*.toml"
- resource_dir:
type: file type: file
description: | description: |
A simple configuration file is used to specify both the source files This directory contains referenced files in the TOML config,
and the set of attributes (in the case of VCF) and the corresponding indicies e.g. exac.vcf.gz + exac.vcf.gz.tbi,
or columns (in the case of BED or other tab-delimited formats) with exception to the lua file.
that should be added to the query file.
pattern: "*.{toml}"
output: output:
- meta: - meta:
@ -50,7 +54,7 @@ output:
- vcf: - vcf:
type: file type: file
description: Annotated VCF file description: Annotated VCF file
pattern: "*.{vcf}" pattern: "*.vcf"
authors: authors:
- "@projectoriented" - "@projectoriented"

View file

@ -154,7 +154,9 @@ params {
justhusky_ped = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped" justhusky_ped = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky.ped"
justhusky_minimal_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz" justhusky_minimal_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz"
justhusky_minimal_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi" justhusky_minimal_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz.tbi"
vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz"
vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml"
} }
'pangenome' { 'pangenome' {
pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa"

View file

@ -2,18 +2,40 @@
nextflow.enable.dsl = 2 nextflow.enable.dsl = 2
include { UNTAR } from '../../../modules/untar/main.nf'
include { VCFANNO } from '../../../modules/vcfanno/main.nf' include { VCFANNO } from '../../../modules/vcfanno/main.nf'
workflow test_vcfanno { workflow test_vcfanno {
input = [ input = [
[ id:'test', single_end:false ], // meta map [ id:'test_compressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
] ]
input_2 = [ [ id:'test_compressed', single_end:false ], // meta map
[] ]
toml = file("https://raw.githubusercontent.com/nf-core/test-datasets/8fbd9f99a2feb3f9e39cd3bcdc4a9176a5835673/data/delete_me/vcfanno.toml", toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
checkIfExists: true) resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
VCFANNO ( input, toml ) UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar )
} }
workflow test_vcfanno_uncompressed {
input = [ [ id:'test_uncompressed', single_end:false ], // meta map
[] ,[] ]
input_2 = [
[ id:'test_uncompressed', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
toml = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true)
resource_dir = file(params.test_data['homo_sapiens']['genome']['vcfanno_tar_gz'], checkIfExists: true)
UNTAR ( resource_dir )
VCFANNO ( input, input_2, toml, UNTAR.out.untar )
}

View file

@ -1,5 +1,5 @@
process { process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
} }

View file

@ -3,7 +3,21 @@
tags: tags:
- vcfanno - vcfanno
files: files:
- path: output/vcfanno/test_annotated.vcf - path: output/untar/versions.yml
md5sum: 34259cf6b0a4698a2917ad3554b50c0f md5sum: 6de038155bccbe4d264d09529bf9f4d8
- path: output/vcfanno/test_compressed_annotated.vcf
md5sum: 90ceb2fd2e06e781846d69c3981db665
- path: output/vcfanno/versions.yml - path: output/vcfanno/versions.yml
md5sum: 62d13540503b22f04a2280c91942cb03 md5sum: 62d13540503b22f04a2280c91942cb03
- name: vcfanno test_vcfanno_uncompressed
command: nextflow run tests/modules/vcfanno -entry test_vcfanno_uncompressed -c tests/config/nextflow.config
tags:
- vcfanno
files:
- path: output/untar/versions.yml
md5sum: a0de70274caac0cca31b077a1e7172d9
- path: output/vcfanno/test_uncompressed_annotated.vcf
md5sum: 90ceb2fd2e06e781846d69c3981db665
- path: output/vcfanno/versions.yml
md5sum: 300218dad68c3ffcc4783daa4f7c5a43