module: deepvariant (#572)

* Add stubs for deepvariant [ci skip]

* Update the stubs for deepvariant [ci skip]

* functional with google docker image

* cleanup

* consume docker container within singularity executor

* update the meta.yml file and ask for review

* tweak the input channel shape and test data

* tweak input data [ci skip]

* update for the new syntax

* remove the functions and rename meta vars

* Update the arguments mechanism

* update chr, region and checksum

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
Co-authored-by: Maxime U. Garcia <max.u.garcia@gmail.com>
Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
This commit is contained in:
Abhinav Sharma 2022-01-17 12:25:36 +02:00 committed by GitHub
parent c9587d76e2
commit 6243c3792c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 142 additions and 0 deletions

View file

@ -0,0 +1,43 @@
process DEEPVARIANT {
tag "$meta.id"
label 'process_medium'
if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the DeepVariant tool. Please use docker or singularity containers."
}
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'google/deepvariant:1.2.0' :
'google/deepvariant:1.2.0' }"
input:
tuple val(meta), path(bam), path(bai)
path(fasta)
path(fai)
output:
tuple val(meta), path("*.vcf.gz") , emit: vcf
tuple val(meta), path("*g.vcf.gz"), emit: gvcf
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
/opt/deepvariant/bin/run_deepvariant \\
--ref=${fasta} \\
--reads=${bam} \\
--output_vcf=${prefix}.vcf.gz \\
--output_gvcf=${prefix}.g.vcf.gz \\
${args} \\
--num_shards=${task.cpus}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,58 @@
name: deepvariant
description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data
keywords:
- variant calling
- machine learning
tools:
- deepvariant:
description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data
homepage: https://github.com/google/deepvariant
documentation: https://github.com/google/deepvariant
tool_dev_url: https://github.com/google/deepvariant
doi: "https://doi.org/10.1038/nbt.4235"
licence: ['BSD-3-clause']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.bam"
- bai:
type: file
description: Index of BAM file
pattern: "*.bai"
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "*.fai"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: Compressed VCF file
pattern: "*.vcf.gz"
- gvcf:
type: file
description: Compressed GVCF file
pattern: "*.g.vcf.gz"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@abhi18av"

View file

@ -396,6 +396,10 @@ deeptools/plotprofile:
- modules/deeptools/plotprofile/**
- tests/modules/deeptools/plotprofile/**
deepvariant:
- modules/deepvariant/**
- tests/modules/deepvariant/**
delly/call:
- modules/delly/call/**
- tests/modules/delly/call/**

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { DEEPVARIANT } from '../../../modules/deepvariant/main.nf'
workflow test_deepvariant {
bam_tuple_ch = Channel.of([[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)])
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
DEEPVARIANT ( bam_tuple_ch, fasta, fai)
}

View file

@ -0,0 +1,10 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: DEEPVARIANT {
ext.args = ' --regions=\"chr22:0-40001\" --model_type=WGS '
ext.prefix = { "${meta.id}_out" }
}
}

View file

@ -0,0 +1,9 @@
- name: deepvariant
command: nextflow run ./tests/modules/deepvariant -entry test_deepvariant -c tests/config/nextflow.config
tags:
- deepvariant
files:
- path: output/deepvariant/test_out.vcf.gz
md5sum: 66d86be7a9bafe1f5c6304ebee18ee20
- path: output/deepvariant/test_out.g.vcf.gz
md5sum: 4ca868f0a4fdb17a280c3ed083d228e0