Add svdb/query (#1249)

* first commit

* add test data
This commit is contained in:
Ramprasad Neethiraj 2022-02-01 20:46:10 +01:00 committed by GitHub
parent 53b324281f
commit fc4bd3a2d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 117 additions and 0 deletions

View file

@ -0,0 +1,34 @@
process SVDB_QUERY {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::svdb=2.5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/svdb:2.5.0--py39hcbe4a3b_0':
'quay.io/biocontainers/svdb:2.5.0--py39hcbe4a3b_0' }"
input:
tuple val(meta), path(vcf)
path (vcf_db)
output:
tuple val(meta), path("*_ann_svdbq.vcf"), emit: vcf
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
svdb \\
--query \\
$args \\
--db $vcf_db \\
--query_vcf $vcf \\
>${prefix}_ann_svdbq.vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
svdb: \$( echo \$(svdb) | head -1 | sed 's/usage: SVDB-\\([0-9]\\.[0-9]\\.[0-9]\\).*/\\1/' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,43 @@
name: svdb_query
description: Query a structural variant database, using a vcf file as query
keywords:
- structural variants
tools:
- svdb:
description: structural variant database software
homepage: https://github.com/J35P312/SVDB
documentation: https://github.com/J35P312/SVDB/blob/master/README.md
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: query vcf file
pattern: "*.{vcf,vcf.gz}"
- vcf_db:
type: file
description: database vcf file
pattern: "*.{vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: Annotated output VCF file
pattern: "*_ann_svdbq.vcf"
authors:
- "@ramprasadn"

View file

@ -1437,6 +1437,10 @@ subread/featurecounts:
- modules/subread/featurecounts/** - modules/subread/featurecounts/**
- tests/modules/subread/featurecounts/** - tests/modules/subread/featurecounts/**
svdb/query:
- modules/svdb/query/**
- tests/modules/svdb/query/**
tabix/bgzip: tabix/bgzip:
- modules/tabix/bgzip/** - modules/tabix/bgzip/**
- tests/modules/tabix/bgzip/** - tests/modules/tabix/bgzip/**

View file

@ -131,6 +131,7 @@ params {
mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" mills_and_1000g_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi"
syntheticvcf_short_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz" syntheticvcf_short_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz"
syntheticvcf_short_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi" syntheticvcf_short_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/syntheticvcf_short.vcf.gz.tbi"
gnomad_r2_1_1_sv_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1-sv.vcf.gz"
hapmap_3_3_hg38_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz" hapmap_3_3_hg38_21_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz"
hapmap_3_3_hg38_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi" hapmap_3_3_hg38_21_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/germlineresources/hapmap_3.3.hg38.vcf.gz.tbi"
@ -277,6 +278,7 @@ params {
cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph"
test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf"
test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz"
} }
'pacbio' { 'pacbio' {
primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta"

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SVDB_QUERY } from '../../../../modules/svdb/query/main.nf'
workflow test_svdb_query {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_sv_vcf'], checkIfExists: true) ]
]
vcf_db = [
file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_sv_vcf_gz'], checkIfExists: true)
]
SVDB_QUERY ( input, vcf_db )
}

View file

@ -0,0 +1,9 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: SVDB_QUERY {
ext.args = '--in_occ AC --out_occ gnomad_svAC --in_frq AF --out_frq gnomad_svAF'
}
}

View file

@ -0,0 +1,7 @@
- name: svdb query
command: nextflow run ./tests/modules/svdb/query -entry test_svdb_query -c ./tests/config/nextflow.config -c ./tests/modules/svdb/query/nextflow.config
tags:
- svdb
- svdb/query
files:
- path: output/svdb/test_ann_svdbq.vcf