Merge pull request #179 from MaxUlysse/master_htslib_tabix

Replace bcftools/tabix by tabix/tabix
This commit is contained in:
Harshil Patel 2021-02-16 22:30:45 +00:00 committed by GitHub
commit b07ad23efa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 133 additions and 155 deletions

8
.github/filters.yml vendored
View file

@ -26,10 +26,6 @@ bcftools_stats:
- software/bcftools/stats/** - software/bcftools/stats/**
- tests/software/bcftools/stats/** - tests/software/bcftools/stats/**
bcftools_tabix:
- software/bcftools/tabix/**
- tests/software/bcftools/tabix/**
bedtools_complement: bedtools_complement:
- software/bedtools/complement/** - software/bedtools/complement/**
- tests/software/bedtools/complement/** - tests/software/bedtools/complement/**
@ -140,6 +136,10 @@ gunzip:
- software/gunzip/** - software/gunzip/**
- tests/software/gunzip/** - tests/software/gunzip/**
tabix_tabix:
- software/tabix/tabix/**
- tests/software/tabix/tabix/**
ivar_consensus: ivar_consensus:
- software/ivar/consensus/** - software/ivar/consensus/**
- tests/software/ivar/consensus/** - tests/software/ivar/consensus/**

View file

@ -1,16 +0,0 @@
process htslib_tabix {
tag "$vcf"
container 'quay.io/biocontainers/tabix:0.2.6--ha92aebf_0'
input:
path vcf
output:
path "${vcf}.tbi"
script:
"""
tabix -p vcf ${vcf}
"""
}

View file

@ -1,26 +0,0 @@
name: htslib tabix
description: create tabix index from a bgzip vcf file
keywords:
- index
- tabix
tools:
- bwa:
description: |
Generic indexer for TAB-delimited genome position files.
homepage: https://www.htslib.org/
documentation: https://www.htslib.org/doc/tabix.1.html
doi: 10.1093/bioinformatics/btq671
input:
-
- input:
type: file
description: Input vcf.gz file
pattern: "*.{vcf.gz}"
output:
-
- index:
type: file
description: tabix index file
pattern: "*.{vcf.gz.tbi}"
authors:
- "@maxulysse"

View file

@ -1,13 +0,0 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
include '../../../tests/functions/check_process_outputs.nf' params(params)
include '../main.nf' params(params)
// Define input channels
input = '../../../test-datasets/tools/file.vcf.gz'
// Run the workflow
workflow {
tabix_index(ch_read_files)
// .check_output()
}

View file

@ -1,2 +0,0 @@
docker.enabled = true
params.outdir = './results'

View file

@ -1,63 +0,0 @@
name: bcftools_tabix
description: Index GFF/BED/SAM/VCF file
keywords:
- index
- vcf
- bed
- sam
- gff
tools:
- stats:
description: |
Indexes a TAB-delimited genome position file.
homepage: http://samtools.github.io/bcftools/bcftools.html
documentation: http://www.htslib.org/doc/bcftools.html
doi: 10.1093/bioinformatics/btp352
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
- singularity_pull_docker_container:
type: boolean
description: |
Instead of directly downloading Singularity images for use with Singularity,
force the workflow to pull and convert Docker containers instead.
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: TAB-delimited genome position file compressed with bgzip
pattern: "*.{vcf.gz,bed.gz,sam.gz,gff.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tbi:
type: file
description: Index file
pattern: "*.{tbi}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@joseespinosa"
- "@drpatelh"

View file

@ -1,4 +1,3 @@
/* /*
* ----------------------------------------------------- * -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files * Utility functions used in nf-core DSL2 module files
@ -31,7 +30,7 @@ def initOptions(Map args) {
*/ */
def getPathFromList(path_list) { def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", '') } // Trim whitespace and trailing slashes paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/') return paths.join('/')
} }

View file

@ -4,21 +4,22 @@ include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:] params.options = [:]
def options = initOptions(params.options) def options = initOptions(params.options)
process BCFTOOLS_TABIX { process TABIX_TABIX {
tag "$meta.id" tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}", publishDir "${params.outdir}",
mode: params.publish_dir_mode, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? "bioconda::bcftools=1.11=h7c999a4_0" : null) conda (params.enable_conda ? "bioconda::tabix=0.2.6" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/bcftools:1.11--h7c999a4_0" container "https://depot.galaxyproject.org/singularity/tabix:0.2.6--ha92aebf_0"
} else { } else {
container "quay.io/biocontainers/bcftools:1.11--h7c999a4_0" container "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
} }
input: input:
tuple val(meta), path(vcf) tuple val(meta), path(tab)
output: output:
tuple val(meta), path("*.tbi"), emit: tbi tuple val(meta), path("*.tbi"), emit: tbi
@ -27,7 +28,8 @@ process BCFTOOLS_TABIX {
script: script:
def software = getSoftwareName(task.process) def software = getSoftwareName(task.process)
""" """
tabix $options.args $vcf tabix $options.args $tab
echo \$(bcftools --version 2>&1) | sed 's/^.*bcftools //; s/ .*\$//' > ${software}.version.txt
echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/(.*\$//' > ${software}.version.txt
""" """
} }

View file

@ -0,0 +1,61 @@
name: tabix_tabix
description: create tabix index from a sorted bgzip tab-delimited genome file
keywords:
- index
- tabix
- vcf
tools:
- tabix:
description: Generic indexer for TAB-delimited genome position files.
homepage: https://www.htslib.org/doc/tabix.html
documentation: https://www.htslib.org/doc/tabix.1.html
doi: 10.1093/bioinformatics/btq671
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
- singularity_pull_docker_container:
type: boolean
description: |
Instead of directly downloading Singularity images for use with Singularity,
force the workflow to pull and convert Docker containers instead.
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tab:
type: file
description: TAB-delimited genome position file compressed with bgzip
pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tbi:
type: file
description: tabix index file
pattern: "*.{tbi}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@joseespinosa"
- "@drpatelh"
- "@maxulysse"

BIN
tests/data/bed/B.bed.gz Normal file

Binary file not shown.

BIN
tests/data/gff/a.gff3.gz Normal file

Binary file not shown.

View file

@ -1,14 +0,0 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BCFTOOLS_TABIX } from '../../../../software/bcftools/tabix/main.nf' addParams( options: [:] )
workflow test_bcftools_tabix {
def input = []
input = [ [ id:'test' ], // meta map
[ file("${launchDir}/tests/data/vcf/test.vcf.gz", checkIfExists: true) ] ]
BCFTOOLS_TABIX ( input )
}

View file

@ -1,8 +0,0 @@
- name: bcftools tabix
command: nextflow run ./tests/software/bcftools/tabix -entry test_bcftools_tabix -c tests/config/nextflow.config
tags:
- bcftools
- bcftools_tabix
files:
- path: output/bcftools/test.vcf.gz.tbi
md5sum: 06d52177f819730dd409157914534e8d

View file

@ -0,0 +1,31 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { TABIX_TABIX as TABIX_BED } from '../../../../software/tabix/tabix/main.nf' addParams( options: ['args': '-p bed'] )
include { TABIX_TABIX as TABIX_GFF } from '../../../../software/tabix/tabix/main.nf' addParams( options: ['args': '-p gff'] )
include { TABIX_TABIX as TABIX_VCF } from '../../../../software/tabix/tabix/main.nf' addParams( options: ['args': '-p vcf'] )
workflow test_tabix_tabix_bed {
def input = []
input = [ [ id:'B.bed' ], // meta map
[ file("${launchDir}/tests/data/bed/B.bed.gz", checkIfExists: true) ] ]
TABIX_BED ( input )
}
workflow test_tabix_tabix_gff {
def input = []
input = [ [ id:'a.gff3' ], // meta map
[ file("${launchDir}/tests/data/gff/a.gff3.gz", checkIfExists: true) ] ]
TABIX_GFF ( input )
}
workflow test_tabix_tabix_vcf {
def input = []
input = [ [ id:'test.vcf' ], // meta map
[ file("${launchDir}/tests/data/vcf/test.vcf.gz", checkIfExists: true) ] ]
TABIX_VCF ( input )
}

View file

@ -0,0 +1,27 @@
- name: tabix tabix bed
command: nextflow run ./tests/software/tabix/tabix -entry test_tabix_tabix_bed -c tests/config/nextflow.config
tags:
- tabix
- tabix_tabix
- bed
files:
- path: output/tabix/B.bed.gz.tbi
md5sum: 6d657159c283200109f2ed082296f853
- name: tabix tabix gff
command: nextflow run ./tests/software/tabix/tabix -entry test_tabix_tabix_gff -c tests/config/nextflow.config
tags:
- tabix
- tabix_tabix
- gff
files:
- path: output/tabix/a.gff3.gz.tbi
md5sum: 77f666b63da45c6af8a394ab5642602e
- name: tabix tabix vcf
command: nextflow run ./tests/software/tabix/tabix -entry test_tabix_tabix_vcf -c tests/config/nextflow.config
tags:
- tabix
- tabix_tabix
- vcf
files:
- path: output/tabix/test.vcf.gz.tbi
md5sum: a03f56d3e968f32256ffb9f6b7d01812