added bcftools convert (#1906)

* added bcftools convert

* Update modules/bcftools/convert/main.nf

Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>

* updated test.yml

* Update modules/bcftools/convert/main.nf

Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>

* added automatic output type detection

* linting

* removed a trailing whitespace

Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com>
This commit is contained in:
nvnieuwk 2022-07-19 15:09:40 +02:00 committed by GitHub
parent 6b9a6a5b29
commit 8d4373b4e8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 204 additions and 0 deletions

View file

@ -0,0 +1,51 @@
process BCFTOOLS_CONVERT {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
input:
tuple val(meta), path(input), path(input_index)
path bed
path fasta
output:
tuple val(meta), path("*.vcf.gz"), optional:true , emit: vcf_gz
tuple val(meta), path("*.vcf") , optional:true , emit: vcf
tuple val(meta), path("*.bcf.gz"), optional:true , emit: bcf_gz
tuple val(meta), path("*.bcf") , optional:true , emit: bcf
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def regions = bed ? "--regions-file $bed" : ""
def reference = fasta ? "--fasta-ref $fasta" : ""
def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf.gz"
"""
bcftools convert \\
$args \\
$regions \\
--output ${prefix}.${extension} \\
--threads $task.cpus \\
$reference \\
$input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,62 @@
name: "bcftools_convert"
description: Converts certain output formats to VCF
keywords:
- bcftools
- convert
- vcf
- gvcf
tools:
- "bcftools":
description: "BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF. All commands work transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically even when streaming from a pipe. Indexed VCF and BCF will work in all situations. Un-indexed VCF and BCF and streams will work in most, but not all situations."
homepage: "https://samtools.github.io/bcftools/bcftools.html"
documentation: "https://samtools.github.io/bcftools/bcftools.html#convert"
tool_dev_url: "https://github.com/samtools/bcftools"
doi: "https://doi.org/10.1093/gigascience/giab008"
licence: "['GPL']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: |
The input format. Each format needs a seperate parameter to be specified in the `args`:
- GEN/SAMPLE file: `--gensample2vcf`
- gVCF file: `--gvcf2vcf`
- HAP/SAMPLE file: `--hapsample2vcf`
- HAP/LEGEND/SAMPLE file: `--haplegendsample2vcf`
- TSV file: `--tsv2vcf`
pattern: "*.{gen,sample,g.vcf,hap,legend}{.gz,}"
- input_index:
type: file
description: (Optional) The index for the input files, if needed
pattern: "*.bed"
- bed:
type: file
description: (Optional) The BED file containing the regions for the VCF file
pattern: "*.bed"
- fasta:
type: file
description: (Optional) The reference fasta, only needed for gVCF conversion
pattern: "*.{fa,fasta}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf:
type: file
description: The converted VCF file
pattern: "*.{bam,cram,sam}"
authors:
- "@nvnieuwk"

View file

@ -162,6 +162,10 @@ bcftools/consensus:
- modules/bcftools/consensus/** - modules/bcftools/consensus/**
- tests/modules/bcftools/consensus/** - tests/modules/bcftools/consensus/**
bcftools/convert:
- modules/bcftools/convert/**
- tests/modules/bcftools/convert/**
bcftools/filter: bcftools/filter:
- modules/bcftools/filter/** - modules/bcftools/filter/**
- tests/modules/bcftools/filter/** - tests/modules/bcftools/filter/**

View file

@ -0,0 +1,51 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BCFTOOLS_CONVERT as BCFTOOLS_CONVERT_GVCF } from '../../../../modules/bcftools/convert/main.nf'
include { BCFTOOLS_CONVERT as BCFTOOLS_CONVERT_BCF } from '../../../../modules/bcftools/convert/main.nf'
workflow test_bcftools_convert_gvcf {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
[]
]
bed = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
BCFTOOLS_CONVERT_GVCF ( input, bed, fasta )
}
workflow test_bcftools_convert_gvcf_bed {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true)
]
bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
BCFTOOLS_CONVERT_GVCF ( input, bed, fasta )
}
workflow test_bcftools_convert_gvcf_to_bcf {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
[]
]
bed = []
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
BCFTOOLS_CONVERT_BCF ( input, bed, fasta )
}

View file

@ -0,0 +1,13 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: BCFTOOLS_CONVERT_GVCF {
ext.args = '--gvcf2vcf'
}
withName: BCFTOOLS_CONVERT_BCF {
ext.args = '--gvcf2vcf --output-type b5'
}
}

View file

@ -0,0 +1,23 @@
- name: bcftools convert test_bcftools_convert_gvcf
command: nextflow run ./tests/modules/bcftools/convert -entry test_bcftools_convert_gvcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/convert/nextflow.config
tags:
- bcftools/convert
- bcftools
files:
- path: output/bcftools/test.vcf.gz
- name: bcftools convert test_bcftools_convert_gvcf_bed
command: nextflow run ./tests/modules/bcftools/convert -entry test_bcftools_convert_gvcf_bed -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/convert/nextflow.config
tags:
- bcftools/convert
- bcftools
files:
- path: output/bcftools/test.vcf.gz
- name: bcftools convert test_bcftools_convert_gvcf_to_bcf
command: nextflow run ./tests/modules/bcftools/convert -entry test_bcftools_convert_gvcf_to_bcf -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/convert/nextflow.config
tags:
- bcftools/convert
- bcftools
files:
- path: output/bcftools/test.bcf.gz