add module for sistr (#1323)

* add module for sistr

* Update test.yml
This commit is contained in:
Robert A. Petit III 2022-02-18 17:33:48 -07:00 committed by GitHub
parent 8055c5d1c3
commit 439763bf2c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 143 additions and 0 deletions

49
modules/sistr/main.nf Normal file
View file

@ -0,0 +1,49 @@
process SISTR {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::sistr_cmd=1.1.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2':
'quay.io/biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2' }"
input:
tuple val(meta), path(fasta)
output:
tuple val(meta), path("*.tab") , emit: tsv
tuple val(meta), path("*-allele.fasta"), emit: allele_fasta
tuple val(meta), path("*-allele.json") , emit: allele_json
tuple val(meta), path("*-cgmlst.csv") , emit: cgmlst_csv
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def is_compressed = fasta.getName().endsWith(".gz") ? true : false
def fasta_name = fasta.getName().replace(".gz", "")
"""
if [ "$is_compressed" == "true" ]; then
gzip -c -d $fasta > $fasta_name
fi
sistr \\
--qc \\
$args \\
--threads $task.cpus \\
--alleles-output ${prefix}-allele.json \\
--novel-alleles ${prefix}-allele.fasta \\
--cgmlst-profiles ${prefix}-cgmlst.csv \\
--output-prediction ${prefix} \\
--output-format tab \\
$fasta_name
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sistr: \$(echo \$(sistr --version 2>&1) | sed 's/^.*sistr_cmd //; s/ .*\$//' )
END_VERSIONS
"""
}

55
modules/sistr/meta.yml Normal file
View file

@ -0,0 +1,55 @@
name: sistr
description: Serovar prediction of salmonella assemblies
keywords:
- bacteria
- fasta
- salmonella
tools:
- sistr:
description: Salmonella In Silico Typing Resource (SISTR) commandline tool for serovar prediction
homepage: https://github.com/phac-nml/sistr_cmd
documentation: https://github.com/phac-nml/sistr_cmd
tool_dev_url: https://github.com/phac-nml/sistr_cmd
doi: "10.1371/journal.pone.0147101"
licence: ['Apache-2.0']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Nucleotide or protein sequences in FASTA format
pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- tsv:
type: file
description: SISTR serovar prediction
pattern: "*.{tsv}"
- allele_json:
type: file
description: Allele sequences and info to JSON
pattern: "*.{json}"
- allele_fasta:
type: file
description: FASTA file destination of novel cgMLST alleles
pattern: "*.{fasta}"
- cgmlst_csv:
type: file
description: CSV file destination for cgMLST allelic profiles
pattern: "*.{csv}"
authors:
- "@rpetit3"

View file

@ -1429,6 +1429,10 @@ shovill:
- modules/shovill/**
- tests/modules/shovill/**
sistr:
- modules/sistr/**
- tests/modules/sistr/**
snpdists:
- modules/snpdists/**
- tests/modules/snpdists/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SISTR } from '../../../modules/sistr/main.nf'
workflow test_sistr {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true)
]
SISTR ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,15 @@
- name: sistr test_sistr
command: nextflow run tests/modules/sistr -entry test_sistr -c tests/config/nextflow.config
tags:
- sistr
files:
- path: output/sistr/test-allele.fasta
md5sum: 144a74999eb9dd01520be5c61e8bd210
- path: output/sistr/test-allele.json
md5sum: 3eb993c9489904621f539a93ff9a90ec
- path: output/sistr/test-cgmlst.csv
md5sum: c50a2144955fe1b98a6d5792bf295088
- path: output/sistr/test.tab
contains: ["cgmlst_ST", "serovar", "matched"]
- path: output/sistr/versions.yml
md5sum: 8b852f002c3ce67e3f6498da15b28296