vcf2maf module with built in VEP (#1768)

vcf2maf module created

Additions:
 - vcf2maf module created
 - If VEP is present in PATH, it is added to command
 - If VEP is missing, it is ignored (recommended to skip with --inhibit-vep)
 - VEP version automatically added to versions.yml
 - Uses VEP cache during testing which is added to test-datasets in https://github.com/nf-core/test-datasets/pull/563
 - Default Docker image includes VEP and vcf2maf
 - Test includes without VEP.

Relates to #490
This commit is contained in:
Adam Talbot 2022-06-20 17:27:12 +01:00 committed by GitHub
parent be8d7b3293
commit f19ec865aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 168 additions and 1 deletions

55
modules/vcf2maf/main.nf Normal file
View file

@ -0,0 +1,55 @@
def VERSION = '1.6.21'
process VCF2MAF {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::vcf2maf=1.6.21 bioconda::ensembl-vep=106.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-b6fc09bed47d0dc4d8384ce9e04af5806f2cc91b:305092c6f8420acd17377d2cc8b96e1c3ccb7d26-0':
'quay.io/biocontainers/mulled-v2-b6fc09bed47d0dc4d8384ce9e04af5806f2cc91b:305092c6f8420acd17377d2cc8b96e1c3ccb7d26-0' }"
input:
tuple val(meta), path(vcf) // Use an uncompressed VCF file!
path fasta // Required
path vep_cache // Required for VEP running. A default of /.vep is supplied.
output:
tuple val(meta), path("*.maf"), emit: maf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def vep_cache_cmd = vep_cache ? "--vep-data $vep_cache" : ""
// If VEP is present, it will find it and add it to commands.
// If VEP is not present they will be blank
"""
if command -v vep &> /dev/null
then
VEP_CMD="--vep-path \$(dirname \$(type -p vep))"
VEP_VERSION=\$(echo -e "\\n ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')")
else
VEP_CMD=""
VEP_VERSION=""
fi
vcf2maf.pl \\
$args \\
\$VEP_CMD \\
$vep_cache_cmd \\
--ref-fasta $fasta \\
--input-vcf $vcf \\
--output-maf ${prefix}.maf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vcf2maf: $VERSION\$VEP_VERSION
END_VERSIONS
"""
}

47
modules/vcf2maf/meta.yml Normal file
View file

@ -0,0 +1,47 @@
name: "vcf2maf"
description: vcf2maf
keywords:
- "vcf2maf"
- annotation
tools:
- "vcf2maf":
description: |
"Convert a VCF into a MAF where each variant is annotated to only one of all possible gene isoforms using vcf2maf. vcf2maf is designed to work with VEP, so it is recommended to have VEP and vcf2maf installed when running this module. Running VEP requires a VEP cache to be present. It is recommended to set the --species and --ncbi-build in ext.args (use the module config). If you wish to skip VEP, add `--inhibit-vep` to ext.args. It may also be necessary to set --tumor-id and --normal-id for correct parsing of the VCF."
homepage: "https://github.com/mskcc/vcf2maf"
documentation: "https://github.com/mskcc/vcf2maf"
tool_dev_url: "https://github.com/mskcc/vcf2maf"
doi: "10.5281/zenodo.593251"
licence: "['Apache-2.0']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: |
vcf to convert to MAF format. Must be uncompressed.
- vep_cache:
type: file
description: |
Path to VEP cache dir. Required for correct running of VEP.
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- maf:
type: file
description: MAF file produced from VCF
pattern: "*.maf"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@adamrtalbot"

View file

@ -2144,6 +2144,10 @@ vcf2db:
- modules/vcf2db/** - modules/vcf2db/**
- tests/modules/vcf2db/** - tests/modules/vcf2db/**
vcf2maf:
- modules/vcf2maf/**
- tests/modules/vcf2maf/**
vcfanno: vcfanno:
- modules/vcfanno/** - modules/vcfanno/**
- tests/modules/vcfanno/** - tests/modules/vcfanno/**

View file

@ -186,6 +186,8 @@ params {
vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz"
vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml"
vep_cache = "${test_data_dir}/genomics/homo_sapiens/genome/vep.tar.gz"
} }
'pangenome' { 'pangenome' {
pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa"
@ -234,7 +236,7 @@ params {
mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam"
mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai"
test3_single_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" test3_single_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam"
read_group_settings_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/read_group_settings.txt" read_group_settings_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/read_group_settings.txt"

View file

@ -0,0 +1,30 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { VCF2MAF } from '../../../modules/vcf2maf/main.nf'
include { UNTAR } from '../../../modules/untar/main.nf'
workflow test_vcf2maf_no_vep {
input_vcf = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true)
]
fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ]
VCF2MAF ( input_vcf, fasta, [] )
}
workflow test_vcf2maf_vep {
input_vcf = [
[ id:'test' ],
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true)
]
fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ]
vep_cache = [ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['vep_cache'], checkIfExists: true) ]
vep_cache_unpacked = UNTAR(vep_cache).untar.map { it[1] }
VCF2MAF ( input_vcf, fasta, vep_cache_unpacked)
}

View file

@ -0,0 +1,14 @@
params.species = "homo_sapiens"
params.build = "GRCh38"
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: 'test_vcf2maf_no_vep:VCF2MAF' {
ext.args = "--ncbi-build ${params.build} --species ${params.species} --inhibit-vep"
container = "quay.io/biocontainers/vcf2maf:1.6.21--hdfd78af_0"
}
withName: 'test_vcf2maf_vep:VCF2MAF' {
ext.args = "--ncbi-build ${params.build} --species ${params.species}"
}
}

View file

@ -0,0 +1,15 @@
- name: vcf2maf test_vcf2maf_no_vep
command: nextflow run ./tests/modules/vcf2maf -entry test_vcf2maf_no_vep -c ./tests/config/nextflow.config -c ./tests/modules/vcf2maf/nextflow.config
tags:
- vcf2maf
files:
- path: output/vcf2maf/test.maf
md5sum: c7f357efa774fd5c4003d84f05212ed0
- name: vcf2maf test_vcf2maf_vep
command: nextflow run ./tests/modules/vcf2maf -entry test_vcf2maf_vep -c ./tests/config/nextflow.config -c ./tests/modules/vcf2maf/nextflow.config
tags:
- vcf2maf
files:
- path: output/vcf2maf/test.maf
md5sum: bf114692a2f421225926c9b651eff161