From 3eff67f54da90901425f18dc11a447b922b09cee Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Fri, 17 Dec 2021 09:08:14 +0100 Subject: [PATCH] Added new module snpsift/split (#1031) * added new module snpsift/split * added options.args * added .vcf.gz to input * removed test and updated to new NF DSL2 syntax * Updated to new NF DSL2 syntax * added option to join vcf files Co-authored-by: Robert A. Petit III Co-authored-by: FriederikeHanssen --- modules/snpsift/split/main.nf | 48 +++++++++++++++++++++ modules/snpsift/split/meta.yml | 44 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 2 + tests/modules/snpsift/split/main.nf | 32 ++++++++++++++ tests/modules/snpsift/split/nextflow.config | 5 +++ tests/modules/snpsift/split/test.yml | 32 ++++++++++++++ 7 files changed, 167 insertions(+) create mode 100644 modules/snpsift/split/main.nf create mode 100644 modules/snpsift/split/meta.yml create mode 100644 tests/modules/snpsift/split/main.nf create mode 100644 tests/modules/snpsift/split/nextflow.config create mode 100644 tests/modules/snpsift/split/test.yml diff --git a/modules/snpsift/split/main.nf b/modules/snpsift/split/main.nf new file mode 100644 index 00000000..a83052ad --- /dev/null +++ b/modules/snpsift/split/main.nf @@ -0,0 +1,48 @@ +process SNPSIFT_SPLIT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' : + 'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.vcf"), emit: out_vcfs + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.split) { + """ + SnpSift \\ + split \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsift: \$( echo \$(SnpSift split -h 2>&1) | sed 's/^.*version //' | sed 's/(.*//' | sed 's/t//g' ) + END_VERSIONS + """ + } else { + """ + SnpSift \\ + split \\ + -j \\ + $args \\ + $vcf \\ + > ${prefix}.joined.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsift: \$( echo \$(SnpSift split -h 2>&1) | sed 's/^.*version //' | sed 's/(.*//' | sed 's/t//g' ) + END_VERSIONS + """ + } + +} diff --git a/modules/snpsift/split/meta.yml b/modules/snpsift/split/meta.yml new file mode 100644 index 00000000..5a125b62 --- /dev/null +++ b/modules/snpsift/split/meta.yml @@ -0,0 +1,44 @@ +name: snpsift_split +description: Splits/Joins VCF(s) file into chromosomes +keywords: + - split + - join + - vcf +tools: + - snpsift: + description: SnpSift is a toolbox that allows you to filter and manipulate annotated files + homepage: https://pcingola.github.io/SnpEff/ss_introduction/ + documentation: https://pcingola.github.io/SnpEff/ss_introduction/ + tool_dev_url: https://github.com/pcingola/SnpEff + doi: "10.3389/fgene.2012.00035" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file (split) or list of files (join) + description: VCF file(s) + pattern: "*.{vcf,vcf.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - out_vcfs: + type: file + description: Split/Joined VCF file(s) + pattern: "*.vcf" + +authors: + - "@SusiJo" + - "@jonasscheid" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 7e3d8f82..708b5a27 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1337,6 +1337,10 @@ snpeff: - modules/snpeff/** - tests/modules/snpeff/** +snpsift/split: + - modules/snpsift/split/** + - tests/modules/snpsift/split/** + snpsites: - modules/snpsites/** - tests/modules/snpsites/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index fddc4489..ee1ba0d8 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -250,6 +250,8 @@ params { cutandrun_bedgraph_test_1 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_h3k27me3_test_1.bedGraph" cutandrun_bedgraph_test_2 = "${test_data_dir}/genomics/homo_sapiens/illumina/bedgraph/cutandtag_igg_test_1.bedGraph" + + test_rnaseq_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.rnaseq.vcf" } 'pacbio' { primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" diff --git a/tests/modules/snpsift/split/main.nf b/tests/modules/snpsift/split/main.nf new file mode 100644 index 00000000..4579fee3 --- /dev/null +++ b/tests/modules/snpsift/split/main.nf @@ -0,0 +1,32 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNPSIFT_SPLIT } from '../../../../modules/snpsift/split/main.nf' + +workflow test_snpsift_split_base { + + input = [ [ id:'test', split:true], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_vcf'], checkIfExists: true) ] + + SNPSIFT_SPLIT ( input ) +} + +workflow test_snpsift_split_gz { + + input = [ [ id:'test', split:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + + SNPSIFT_SPLIT ( input ) +} + +workflow test_snpsift_join { + + input = [ [ id:'test', split:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] + ] + + SNPSIFT_SPLIT ( input ) + +} diff --git a/tests/modules/snpsift/split/nextflow.config b/tests/modules/snpsift/split/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/snpsift/split/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/snpsift/split/test.yml b/tests/modules/snpsift/split/test.yml new file mode 100644 index 00000000..529fbca5 --- /dev/null +++ b/tests/modules/snpsift/split/test.yml @@ -0,0 +1,32 @@ +- name: snpsift split test_snpsift_split_base + command: nextflow run tests/modules/snpsift/split -entry test_snpsift_split_base -c tests/config/nextflow.config + tags: + - snpsift/split + - snpsift + files: + - path: output/snpsift/test.rnaseq.chr22.vcf + md5sum: 1bb8724dcbe6fa3101a814c0be51d1ea + - path: output/snpsift/versions.yml + md5sum: 2b9f6b788db6c4fcbf258db763d8fab7 + +- name: snpsift split test_snpsift_split_gz + command: nextflow run tests/modules/snpsift/split -entry test_snpsift_split_gz -c tests/config/nextflow.config + tags: + - snpsift/split + - snpsift + files: + - path: output/snpsift/test.MT192765.1.vcf + md5sum: 9d491cfa84067450342ba8e66c75e5b8 + - path: output/snpsift/versions.yml + md5sum: 6bd63376670d6c1445caea2f31a3f579 + +- name: snpsift split test_snpsift_join + command: nextflow run tests/modules/snpsift/split -entry test_snpsift_join -c tests/config/nextflow.config + tags: + - snpsift/split + - snpsift + files: + - path: output/snpsift/test.joined.vcf + md5sum: c400c7458524d889e0967b06ed72534f + - path: output/snpsift/versions.yml + md5sum: be54682a73d3b91a17eacc0e533448f5