new module: svtk/standardize (#2031)

* new module: svtk/standardize

* linting

* fixed tests
This commit is contained in:
nvnieuwk 2022-09-08 15:03:08 +02:00 committed by GitHub
parent 3eb99152ce
commit bf09fdf892
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 217 additions and 4 deletions

View file

@ -0,0 +1,45 @@
process SVTK_STANDARDIZE {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::svtk=0.0.20190615" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/svtk:0.0.20190615--py37h73a75cf_2':
'quay.io/biocontainers/svtk:0.0.20190615--py37h73a75cf_2' }"
input:
tuple val(meta), path(vcf)
path fasta_fai
output:
tuple val(meta), path("*.std.vcf.gz"), emit: standardized_vcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def arguments = args.args ?: ''
def caller = args.caller ?: 'delly'
def VERSION = '0.0.20190615' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def contigs = fasta_fai ? "--contigs ${fasta_fai}" : ""
"""
svtk standardize \\
${arguments} \\
${contigs} \\
${vcf} \\
${prefix}.std.vcf.gz \\
${caller}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
svtk: ${VERSION}
END_VERSIONS
"""
}

View file

@ -0,0 +1,61 @@
name: "svtk_standardize"
description: Convert SV calls to a standardized format.
keywords:
- svtk
- structural variants
- SV
- vcf
- standardization
tools:
- "svtk":
description: "Utilities for consolidating, filtering, resolving, and annotating structural variants."
homepage: "https://github.com/broadinstitute/gatk-sv/tree/master/src/svtk"
documentation: "https://github.com/broadinstitute/gatk-sv/tree/master/src/svtk"
tool_dev_url: "https://github.com/broadinstitute/gatk-sv/tree/master/src/svtk"
doi: ""
licence: "['MIT']"
input:
- args:
type: map
description: |
Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes.
```
{
[
"args": "",
"caller": "delly" // Should be either delly, lumpy, manta, wham or melt
]
}
```
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: A gzipped VCF file to be standardized
pattern: "*.vcf.gz"
- fasta_fai:
type: file
description: Optional fasta index file that specifies the contigs to be used in the VCF header (defaults to all contigs of GRCh37)
pattern: "*.fai"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- standardized_vcf:
type: file
description: A gzipped version of the standardized VCF file
pattern: "*.std.vcf.gz"
authors:
- "@nvnieuwk"

View file

@ -1538,14 +1538,14 @@ metabat2/metabat2:
- modules/metabat2/metabat2/**
- tests/modules/metabat2/metabat2/**
metaphlan3/metaphlan3:
- modules/metaphlan3/metaphlan3/**
- tests/modules/metaphlan3/metaphlan3/**
metaphlan3/mergemetaphlantables:
- modules/metaphlan3/mergemetaphlantables/**
- tests/modules/metaphlan3/mergemetaphlantables/**
metaphlan3/metaphlan3:
- modules/metaphlan3/metaphlan3/**
- tests/modules/metaphlan3/metaphlan3/**
methyldackel/extract:
- modules/methyldackel/extract/**
- tests/modules/methyldackel/extract/**
@ -2263,6 +2263,10 @@ svdb/query:
- modules/svdb/query/**
- tests/modules/svdb/query/**
svtk/standardize:
- modules/svtk/standardize/**
- tests/modules/svtk/standardize/**
tabix/bgzip:
- modules/tabix/bgzip/**
- tests/modules/tabix/bgzip/**

View file

@ -0,0 +1,56 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SVTK_STANDARDIZE } from '../../../../modules/svtk/standardize/main.nf'
include { MANTA_GERMLINE } from '../../../../modules/manta/germline/main.nf'
workflow test_svtk_standardize {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed_gz_tbi'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
MANTA_GERMLINE(
input,
fasta,
fasta_fai
)
SVTK_STANDARDIZE (
MANTA_GERMLINE.out.diploid_sv_vcf,
fasta_fai
)
}
workflow test_svtk_standardize_no_contigs {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed_gz_tbi'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
MANTA_GERMLINE(
input,
fasta,
fasta_fai
)
SVTK_STANDARDIZE (
MANTA_GERMLINE.out.diploid_sv_vcf,
[]
)
}

View file

@ -0,0 +1,12 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: SVTK_STANDARDIZE {
ext.args = [
args : '',
caller : 'manta'
]
}
}

View file

@ -0,0 +1,35 @@
- name: svtk standardize test_svtk_standardize
command: nextflow run ./tests/modules/svtk/standardize -entry test_svtk_standardize -c ./tests/config/nextflow.config -c ./tests/modules/svtk/standardize/nextflow.config
tags:
- svtk
- svtk/standardize
files:
- path: output/manta/test.candidate_small_indels.vcf.gz
- path: output/manta/test.candidate_small_indels.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/manta/test.candidate_sv.vcf.gz
- path: output/manta/test.candidate_sv.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/manta/test.diploid_sv.vcf.gz
- path: output/manta/test.diploid_sv.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/svtk/test.std.vcf.gz
md5sum: f7530f3bc7e6020e758cc996adc8ae35
- name: svtk standardize test_svtk_standardize_no_contigs
command: nextflow run ./tests/modules/svtk/standardize -entry test_svtk_standardize_no_contigs -c ./tests/config/nextflow.config -c ./tests/modules/svtk/standardize/nextflow.config
tags:
- svtk
- svtk/standardize
files:
- path: output/manta/test.candidate_small_indels.vcf.gz
- path: output/manta/test.candidate_small_indels.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/manta/test.candidate_sv.vcf.gz
- path: output/manta/test.candidate_sv.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/manta/test.diploid_sv.vcf.gz
- path: output/manta/test.diploid_sv.vcf.gz.tbi
md5sum: 4cb176febbc8c26d717a6c6e67b9c905
- path: output/svtk/test.std.vcf.gz
md5sum: ea67be7438a0fb72db8096e1bc1bb957