Add bcftools/index (#812)

* feat: add bcftools index

* Extend tests to also test gen for TBI

* Update meta.yml

* Update meta.yml
This commit is contained in:
James A. Fellows Yates 2021-10-13 10:40:04 +02:00 committed by GitHub
parent 359f721cc9
commit 7b1e84f7be
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 215 additions and 0 deletions

View file

@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -0,0 +1,44 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process BCFTOOLS_INDEX {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? 'bioconda::bcftools=1.13' : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/bcftools:1.13--h3a49de5_0"
} else {
container "quay.io/biocontainers/bcftools:1.13--h3a49de5_0"
}
input:
tuple val(meta), path(vcf)
output:
tuple val(meta), path("*.csi"), optional:true, emit: csi
tuple val(meta), path("*.tbi"), optional:true, emit: tbi
path "versions.yml" , emit: version
script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
bcftools \\
index \\
$options.args \\
--threads $task.cpus \\
$vcf
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,49 @@
name: bcftools_index
description: Index VCF tools
keywords:
- vcf
- index
- bcftools
- csi
- tbi
tools:
- bcftools:
description: BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF. All commands work transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically even when streaming from a pipe. Indexed VCF and BCF will work in all situations. Un-indexed VCF and BCF and streams will work in most, but not all situations.
homepage: https://samtools.github.io/bcftools/
documentation: https://samtools.github.io/bcftools/howtos/index.html
tool_dev_url: https://github.com/samtools/bcftools
doi: "10.1093/gigascience/giab008"
licence: ['GPL']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- VCF:
type: file
description: VCF file (optionally GZIPPED)
pattern: "*.{vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "versions.yml"
- csi:
type: file
description: Default VCF file index file
pattern: "*.csi"
- tbi:
type: file
description: Alternative VCF file index file for larger files (activated with -t parameter)
pattern: "*.tbi"
authors:
- "@jfy133"

View file

@ -70,6 +70,10 @@ bcftools/filter:
- modules/bcftools/filter/**
- tests/modules/bcftools/filter/**
bcftools/index:
- modules/bcftools/index/**
- tests/modules/bcftools/index**
bcftools/isec:
- modules/bcftools/isec/**
- tests/modules/bcftools/isec/**

View file

@ -0,0 +1,23 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_CSI } from '../../../../modules/bcftools/index/main.nf' addParams( options: [:] )
include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_TBI } from '../../../../modules/bcftools/index/main.nf' addParams( options: [args: '-t'] )
workflow test_bcftools_index_csi {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
BCFTOOLS_INDEX_CSI ( input )
}
workflow test_bcftools_index_tbi {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
BCFTOOLS_INDEX_TBI ( input )
}

View file

@ -0,0 +1,17 @@
- name: bcftools index
command: nextflow run ./tests/modules/bcftools/index -entry test_bcftools_index_csi -c tests/config/nextflow.config
tags:
- bcftools
- bcftools/index
files:
- path: output/bcftools/test.vcf.gz.csi
md5sum: 5f930522d2b9dcdba2807b7da4dfa3fd
- name: bcftools index tbi
command: nextflow run ./tests/modules/bcftools/index -entry test_bcftools_index_tbi -c tests/config/nextflow.config
tags:
- bcftools
- bcftools/index
files:
- path: output/bcftools/test.vcf.gz.tbi
md5sum: 36e11bf96ed0af4a92caa91a68612d64