Added new module snpsift/split (#1031)

* added new module snpsift/split

* added options.args

* added .vcf.gz to input

* removed test and updated to new NF DSL2 syntax

* Updated to new NF DSL2 syntax

* added option to join vcf files

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
This commit is contained in:
SusiJo 2021-12-17 09:08:14 +01:00 committed by GitHub
parent 0d1e21686a
commit 3eff67f54d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 167 additions and 0 deletions

View file

@ -0,0 +1,48 @@
process SNPSIFT_SPLIT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' :
'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }"
input:
tuple val(meta), path(vcf)
output:
tuple val(meta), path("*.vcf"), emit: out_vcfs
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.split) {
"""
SnpSift \\
split \\
$args \\
$vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
snpsift: \$( echo \$(SnpSift split -h 2>&1) | sed 's/^.*version //' | sed 's/(.*//' | sed 's/t//g' )
END_VERSIONS
"""
} else {
"""
SnpSift \\
split \\
-j \\
$args \\
$vcf \\
> ${prefix}.joined.vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
snpsift: \$( echo \$(SnpSift split -h 2>&1) | sed 's/^.*version //' | sed 's/(.*//' | sed 's/t//g' )
END_VERSIONS
"""
}
}

View file

@ -0,0 +1,44 @@
name: snpsift_split
description: Splits/Joins VCF(s) file into chromosomes
keywords:
- split
- join
- vcf
tools:
- snpsift:
description: SnpSift is a toolbox that allows you to filter and manipulate annotated files
homepage: https://pcingola.github.io/SnpEff/ss_introduction/
documentation: https://pcingola.github.io/SnpEff/ss_introduction/
tool_dev_url: https://github.com/pcingola/SnpEff
doi: "10.3389/fgene.2012.00035"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file (split) or list of files (join)
description: VCF file(s)
pattern: "*.{vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- out_vcfs:
type: file
description: Split/Joined VCF file(s)
pattern: "*.vcf"
authors:
- "@SusiJo"
- "@jonasscheid"

View file

@ -1337,6 +1337,10 @@ snpeff:
- modules/snpeff/** - modules/snpeff/**
- tests/modules/snpeff/** - tests/modules/snpeff/**
snpsift/split:
- modules/snpsift/split/**
- tests/modules/snpsift/split/**
snpsites: snpsites:
- modules/snpsites/** - modules/snpsites/**
- tests/modules/snpsites/** - tests/modules/snpsites/**

View file

@ -250,6 +250,8 @@ params {
cutandrun_bedgraph_test_1 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" cutandrun_bedgraph_test_1 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph"
cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph"
test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf"
} }
'pacbio' { 'pacbio' {
primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta"

View file

@ -0,0 +1,32 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SNPSIFT_SPLIT } from '../../../../modules/snpsift/split/main.nf'
workflow test_snpsift_split_base {
input = [ [ id:'test', split:true], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true) ]
SNPSIFT_SPLIT ( input )
}
workflow test_snpsift_split_gz {
input = [ [ id:'test', split:true ], // meta map
file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
SNPSIFT_SPLIT ( input )
}
workflow test_snpsift_join {
input = [ [ id:'test', split:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
]
SNPSIFT_SPLIT ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,32 @@
- name: snpsift split test_snpsift_split_base
command: nextflow run tests/modules/snpsift/split -entry test_snpsift_split_base -c tests/config/nextflow.config
tags:
- snpsift/split
- snpsift
files:
- path: output/snpsift/test.rnaseq.chr22.vcf
md5sum: 1bb8724dcbe6fa3101a814c0be51d1ea
- path: output/snpsift/versions.yml
md5sum: 2b9f6b788db6c4fcbf258db763d8fab7
- name: snpsift split test_snpsift_split_gz
command: nextflow run tests/modules/snpsift/split -entry test_snpsift_split_gz -c tests/config/nextflow.config
tags:
- snpsift/split
- snpsift
files:
- path: output/snpsift/test.MT192765.1.vcf
md5sum: 9d491cfa84067450342ba8e66c75e5b8
- path: output/snpsift/versions.yml
md5sum: 6bd63376670d6c1445caea2f31a3f579
- name: snpsift split test_snpsift_join
command: nextflow run tests/modules/snpsift/split -entry test_snpsift_join -c tests/config/nextflow.config
tags:
- snpsift/split
- snpsift
files:
- path: output/snpsift/test.joined.vcf
md5sum: c400c7458524d889e0967b06ed72534f
- path: output/snpsift/versions.yml
md5sum: be54682a73d3b91a17eacc0e533448f5