add snippy/core module (#1855)

* add snippy-core module

* Update modules/snippy/core/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update tests/modules/snippy/core/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update tests/modules/snippy/core/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/snippy/core/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update main.nf

* update test

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
Robert A. Petit III 2022-09-07 22:58:10 -06:00 committed by GitHub
parent 8d5737116b
commit 1ade577ef6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 167 additions and 0 deletions

View file

@ -0,0 +1,53 @@
process SNIPPY_CORE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2':
'quay.io/biocontainers/snippy:4.6.0--hdfd78af_1' }"
input:
tuple val(meta), path(vcf), path(aligned_fa)
path reference
output:
tuple val(meta), path("${prefix}.aln") , emit: aln
tuple val(meta), path("${prefix}.full.aln"), emit: full_aln
tuple val(meta), path("${prefix}.tab") , emit: tab
tuple val(meta), path("${prefix}.vcf") , emit: vcf
tuple val(meta), path("${prefix}.txt") , emit: txt
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def is_compressed = reference.getName().endsWith(".gz") ? true : false
def reference_name = reference.getName().replace(".gz", "")
"""
if [ "$is_compressed" == "true" ]; then
gzip -c -d $reference > $reference_name
fi
# Collect samples into necessary folders
mkdir samples
find . -name "*.vcf" | sed 's/.vcf//' | xargs -I {} bash -c 'mkdir samples/{}'
find . -name "*.vcf" | sed 's/.vcf//' | xargs -I {} bash -c 'cp -L {}.vcf samples/{}/{}.vcf'
find . -name "*.aligned.fa" | sed 's/.aligned.fa//' | xargs -I {} bash -c 'cp -L {}.aligned.fa samples/{}/{}.aligned.fa'
# Run snippy-core
snippy-core \\
$args \\
--ref $reference_name \\
--prefix $prefix \\
samples/*
cat <<-END_VERSIONS > versions.yml
"${task.process}":
snippy-core: \$(echo \$(snippy-core --version 2>&1) | sed 's/snippy-core //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,67 @@
name: snippy_core
description: Core-SNP alignment from Snippy outputs
keywords:
- core
- alignment
- bacteria
- snippy
tools:
- snippy:
description: "Rapid bacterial SNP calling and core genome alignments"
homepage: "https://github.com/tseemann/snippy"
documentation: "https://github.com/tseemann/snippy"
tool_dev_url: "https://github.com/tseemann/snippy"
doi: ""
licence: "['GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: Annotated variants in VCF format
pattern: "*.vcf.gz"
- aligned_fa:
type: file
description: A version of the reference but with - at position with depth=0 and N for 0 < depth < --mincov (does not have variants)
pattern: "*.aligned.fa.gz"
- reference:
type: file
description: Reference genome in GenBank (preferred) or FASTA format
pattern: "*.{gbk,gbk.gz,gbff,gbff.gz,fa,fa.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- aln:
type: file
description: A core SNP alignment in FASTA format
pattern: "*.aln"
- full_aln:
type: file
description: A whole genome SNP alignment (includes invariant sites)
pattern: "*.full.aln"
- tab:
type: file
description: Tab-separated columnar list of core SNP sites with alleles but NO annotations
pattern: "*.tab"
- vcf:
type: file
description: Multi-sample VCF file with genotype GT tags for all discovered alleles
pattern: "*.vcf"
- txt:
type: file
description: Tab-separated columnar list of alignment/core-size statistics
pattern: "*.txt"
authors:
- "@rpetit3"

View file

@ -2159,6 +2159,10 @@ snapaligner/index:
- modules/snapaligner/index/**
- tests/modules/snapaligner/index/**
snippy/core:
- modules/snippy/core/**
- tests/modules/snippy/core/**
snippy/run:
- modules/snippy/run/**
- tests/modules/snippy/run/**

View file

@ -0,0 +1,22 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SNIPPY_RUN } from '../../../../modules/snippy/run/main.nf'
include { SNIPPY_CORE } from '../../../../modules/snippy/core/main.nf'
workflow test_snippy_core {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['candidatus_portiera_aleyrodidarum']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['candidatus_portiera_aleyrodidarum']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
]
reference = file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
SNIPPY_RUN ( input, reference )
SNIPPY_RUN.out.vcf.collect{meta, vcf -> vcf}.map{ vcf -> [[id:'snippy-core'], vcf]}.set{ ch_merge_vcf }
SNIPPY_RUN.out.aligned_fa.collect{meta, aligned_fa -> aligned_fa}.map{ aligned_fa -> [[id:'snippy-core'], aligned_fa]}.set{ ch_merge_aligned_fa }
ch_merge_vcf.join( ch_merge_aligned_fa ).set{ ch_snippy_core }
SNIPPY_CORE( ch_snippy_core, reference )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,16 @@
- name: snippy core test_snippy_core
command: nextflow run ./tests/modules/snippy/core -entry test_snippy_core -c ./tests/config/nextflow.config -c ./tests/modules/snippy/core/nextflow.config
tags:
- snippy
- snippy/core
files:
- path: output/snippy/snippy-core.aln
md5sum: 23cf80454a83849977aeb7193a5eab8e
- path: output/snippy/snippy-core.full.aln
md5sum: fcadf17c2c8109fe4491ab7e7df6e261
- path: output/snippy/snippy-core.tab
md5sum: 1eb437b4d82d8dbc93649b0edf94abc8
- path: output/snippy/snippy-core.txt
md5sum: 243961814f44b9a6cac48b69f02b4349
- path: output/snippy/snippy-core.vcf
md5sum: 8b4ff1f2e7f1cef9d168dad5d70e642a