From f19ec865aa5dc48029d3b84770c5c484f44e8aad Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Mon, 20 Jun 2022 17:27:12 +0100 Subject: [PATCH] vcf2maf module with built in VEP (#1768) vcf2maf module created Additions: - vcf2maf module created - If VEP is present in PATH, it is added to command - If VEP is missing, it is ignored (recommended to skip with --inhibit-vep) - VEP version automatically added to versions.yml - Uses VEP cache during testing which is added to test-datasets in https://github.com/nf-core/test-datasets/pull/563 - Default Docker image includes VEP and vcf2maf - Test includes without VEP. Relates to #490 --- modules/vcf2maf/main.nf | 55 +++++++++++++++++++++++++++ modules/vcf2maf/meta.yml | 47 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 4 +- tests/modules/vcf2maf/main.nf | 30 +++++++++++++++ tests/modules/vcf2maf/nextflow.config | 14 +++++++ tests/modules/vcf2maf/test.yml | 15 ++++++++ 7 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 modules/vcf2maf/main.nf create mode 100644 modules/vcf2maf/meta.yml create mode 100644 tests/modules/vcf2maf/main.nf create mode 100644 tests/modules/vcf2maf/nextflow.config create mode 100644 tests/modules/vcf2maf/test.yml diff --git a/modules/vcf2maf/main.nf b/modules/vcf2maf/main.nf new file mode 100644 index 00000000..286ca531 --- /dev/null +++ b/modules/vcf2maf/main.nf @@ -0,0 +1,55 @@ +def VERSION = '1.6.21' + +process VCF2MAF { + + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::vcf2maf=1.6.21 bioconda::ensembl-vep=106.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-b6fc09bed47d0dc4d8384ce9e04af5806f2cc91b:305092c6f8420acd17377d2cc8b96e1c3ccb7d26-0': + 'quay.io/biocontainers/mulled-v2-b6fc09bed47d0dc4d8384ce9e04af5806f2cc91b:305092c6f8420acd17377d2cc8b96e1c3ccb7d26-0' }" + + input: + tuple val(meta), path(vcf) // Use an uncompressed VCF file! + path fasta // Required + path vep_cache // Required for VEP running. A default of /.vep is supplied. + + output: + tuple val(meta), path("*.maf"), emit: maf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def vep_cache_cmd = vep_cache ? "--vep-data $vep_cache" : "" + + // If VEP is present, it will find it and add it to commands. + // If VEP is not present they will be blank + """ + if command -v vep &> /dev/null + then + VEP_CMD="--vep-path \$(dirname \$(type -p vep))" + VEP_VERSION=\$(echo -e "\\n ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//')") + else + VEP_CMD="" + VEP_VERSION="" + fi + + vcf2maf.pl \\ + $args \\ + \$VEP_CMD \\ + $vep_cache_cmd \\ + --ref-fasta $fasta \\ + --input-vcf $vcf \\ + --output-maf ${prefix}.maf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcf2maf: $VERSION\$VEP_VERSION + END_VERSIONS + """ +} diff --git a/modules/vcf2maf/meta.yml b/modules/vcf2maf/meta.yml new file mode 100644 index 00000000..d9387530 --- /dev/null +++ b/modules/vcf2maf/meta.yml @@ -0,0 +1,47 @@ +name: "vcf2maf" +description: vcf2maf +keywords: + - "vcf2maf" + - annotation +tools: + - "vcf2maf": + description: | + "Convert a VCF into a MAF where each variant is annotated to only one of all possible gene isoforms using vcf2maf. vcf2maf is designed to work with VEP, so it is recommended to have VEP and vcf2maf installed when running this module. Running VEP requires a VEP cache to be present. It is recommended to set the --species and --ncbi-build in ext.args (use the module config). If you wish to skip VEP, add `--inhibit-vep` to ext.args. It may also be necessary to set --tumor-id and --normal-id for correct parsing of the VCF." + homepage: "https://github.com/mskcc/vcf2maf" + documentation: "https://github.com/mskcc/vcf2maf" + tool_dev_url: "https://github.com/mskcc/vcf2maf" + doi: "10.5281/zenodo.593251" + licence: "['Apache-2.0']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to convert to MAF format. Must be uncompressed. + - vep_cache: + type: file + description: | + Path to VEP cache dir. Required for correct running of VEP. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - maf: + type: file + description: MAF file produced from VCF + pattern: "*.maf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@adamrtalbot" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 63cd0f39..f4d32f8c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2144,6 +2144,10 @@ vcf2db: - modules/vcf2db/** - tests/modules/vcf2db/** +vcf2maf: + - modules/vcf2maf/** + - tests/modules/vcf2maf/** + vcfanno: - modules/vcfanno/** - tests/modules/vcfanno/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 47605e19..ebf3b063 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -186,6 +186,8 @@ params { vcfanno_tar_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno_grch38_module_test.tar.gz" vcfanno_toml = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/vcfanno/vcfanno.toml" + + vep_cache = "${test_data_dir}/genomics/homo_sapiens/genome/vep.tar.gz" } 'pangenome' { pangenome_fa = "${test_data_dir}/pangenomics/homo_sapiens/pangenome.fa" @@ -234,7 +236,7 @@ params { mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" - + test3_single_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" read_group_settings_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/read_group_settings.txt" diff --git a/tests/modules/vcf2maf/main.nf b/tests/modules/vcf2maf/main.nf new file mode 100644 index 00000000..57f3a6cb --- /dev/null +++ b/tests/modules/vcf2maf/main.nf @@ -0,0 +1,30 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VCF2MAF } from '../../../modules/vcf2maf/main.nf' +include { UNTAR } from '../../../modules/untar/main.nf' + +workflow test_vcf2maf_no_vep { + + input_vcf = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + ] + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + + VCF2MAF ( input_vcf, fasta, [] ) +} + +workflow test_vcf2maf_vep { + + input_vcf = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + ] + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + vep_cache = [ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['vep_cache'], checkIfExists: true) ] + + vep_cache_unpacked = UNTAR(vep_cache).untar.map { it[1] } + VCF2MAF ( input_vcf, fasta, vep_cache_unpacked) +} diff --git a/tests/modules/vcf2maf/nextflow.config b/tests/modules/vcf2maf/nextflow.config new file mode 100644 index 00000000..0903de1b --- /dev/null +++ b/tests/modules/vcf2maf/nextflow.config @@ -0,0 +1,14 @@ +params.species = "homo_sapiens" +params.build = "GRCh38" + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: 'test_vcf2maf_no_vep:VCF2MAF' { + ext.args = "--ncbi-build ${params.build} --species ${params.species} --inhibit-vep" + container = "quay.io/biocontainers/vcf2maf:1.6.21--hdfd78af_0" + } + withName: 'test_vcf2maf_vep:VCF2MAF' { + ext.args = "--ncbi-build ${params.build} --species ${params.species}" + } +} diff --git a/tests/modules/vcf2maf/test.yml b/tests/modules/vcf2maf/test.yml new file mode 100644 index 00000000..c0a7b4eb --- /dev/null +++ b/tests/modules/vcf2maf/test.yml @@ -0,0 +1,15 @@ +- name: vcf2maf test_vcf2maf_no_vep + command: nextflow run ./tests/modules/vcf2maf -entry test_vcf2maf_no_vep -c ./tests/config/nextflow.config -c ./tests/modules/vcf2maf/nextflow.config + tags: + - vcf2maf + files: + - path: output/vcf2maf/test.maf + md5sum: c7f357efa774fd5c4003d84f05212ed0 + +- name: vcf2maf test_vcf2maf_vep + command: nextflow run ./tests/modules/vcf2maf -entry test_vcf2maf_vep -c ./tests/config/nextflow.config -c ./tests/modules/vcf2maf/nextflow.config + tags: + - vcf2maf + files: + - path: output/vcf2maf/test.maf + md5sum: bf114692a2f421225926c9b651eff161