Add picard/sortvcf (#1370)

* sortvcf

* add files

* update meta

* update java mem

* update documentation link

* remove todo

* review suggestions

* fix test.yml

* fix conda error

* fix version code
This commit is contained in:
Ramprasad Neethiraj 2022-03-03 17:40:16 +01:00 committed by GitHub
parent 76cdd46f3f
commit d8028dc1c3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 123 additions and 0 deletions

View file

@ -0,0 +1,49 @@
process PICARD_SORTVCF {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::picard=2.26.10" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' :
'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }"
input:
tuple val(meta), path(vcf)
path reference
path sequence_dict
output:
tuple val(meta), path("*_sorted.vcf.gz"), emit: vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def seq_dict = sequence_dict ? "-SEQUENCE_DICTIONARY $sequence_dict" : ""
def reference = reference ? "-REFERENCE_SEQUENCE $reference" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[Picard SortVcf] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
picard \\
SortVcf \\
-Xmx${avail_mem}g \\
--INPUT $vcf \\
$args \\
$seq_dict \\
$reference \\
--OUTPUT ${prefix}_sorted.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
picard: \$(picard SortVcf --version 2>&1 | grep -o 'Version:.*' | cut -f2- -d:)
END_VERSIONS
"""
}

View file

@ -0,0 +1,40 @@
name: picard_sortvcf
description: Sorts vcf files
keywords:
- sort
- vcf
tools:
- picard:
description: Java tools for working with NGS data in the BAM/CRAM/SAM and VCF format
homepage: https://broadinstitute.github.io/picard/
documentation: https://broadinstitute.github.io/picard/command-line-overview.html#SortVcf
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF file
pattern: "*.{vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: Sorted VCF file
pattern: "*.{vcf}"
authors:
- "@ramprasadn"

View file

@ -1249,6 +1249,10 @@ picard/sortsam:
- modules/picard/sortsam/** - modules/picard/sortsam/**
- tests/modules/picard/sortsam/** - tests/modules/picard/sortsam/**
picard/sortvcf:
- modules/picard/sortvcf/**
- tests/modules/picard/sortvcf/**
pirate: pirate:
- modules/pirate/** - modules/pirate/**
- tests/modules/pirate/** - tests/modules/pirate/**

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { PICARD_SORTVCF } from '../../../../modules/picard/sortvcf/main.nf'
workflow test_picard_sortvcf {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
]
fasta = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
dict = [ file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
PICARD_SORTVCF ( input, fasta, dict )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,7 @@
- name: picard sortvcf
command: nextflow run ./tests/modules/picard/sortvcf -entry test_picard_sortvcf -c ./tests/config/nextflow.config -c ./tests/modules/picard/sortvcf/nextflow.config
tags:
- picard
- picard/sortvcf
files:
- path: output/picard/test_sorted.vcf.gz