From 439763bf2c19b9836cc4d600747bf765170bada1 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 18 Feb 2022 17:33:48 -0700 Subject: [PATCH] add module for sistr (#1323) * add module for sistr * Update test.yml --- modules/sistr/main.nf | 49 +++++++++++++++++++++++++ modules/sistr/meta.yml | 55 +++++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 +++ tests/modules/sistr/main.nf | 15 ++++++++ tests/modules/sistr/nextflow.config | 5 +++ tests/modules/sistr/test.yml | 15 ++++++++ 6 files changed, 143 insertions(+) create mode 100644 modules/sistr/main.nf create mode 100644 modules/sistr/meta.yml create mode 100644 tests/modules/sistr/main.nf create mode 100644 tests/modules/sistr/nextflow.config create mode 100644 tests/modules/sistr/test.yml diff --git a/modules/sistr/main.nf b/modules/sistr/main.nf new file mode 100644 index 00000000..0301d053 --- /dev/null +++ b/modules/sistr/main.nf @@ -0,0 +1,49 @@ +process SISTR { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::sistr_cmd=1.1.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2': + 'quay.io/biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.tab") , emit: tsv + tuple val(meta), path("*-allele.fasta"), emit: allele_fasta + tuple val(meta), path("*-allele.json") , emit: allele_json + tuple val(meta), path("*-cgmlst.csv") , emit: cgmlst_csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getName().endsWith(".gz") ? true : false + def fasta_name = fasta.getName().replace(".gz", "") + """ + if [ "$is_compressed" == "true" ]; then + gzip -c -d $fasta > $fasta_name + fi + + sistr \\ + --qc \\ + $args \\ + --threads $task.cpus \\ + --alleles-output ${prefix}-allele.json \\ + --novel-alleles ${prefix}-allele.fasta \\ + --cgmlst-profiles ${prefix}-cgmlst.csv \\ + --output-prediction ${prefix} \\ + --output-format tab \\ + $fasta_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sistr: \$(echo \$(sistr --version 2>&1) | sed 's/^.*sistr_cmd //; s/ .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/sistr/meta.yml b/modules/sistr/meta.yml new file mode 100644 index 00000000..5ce43334 --- /dev/null +++ b/modules/sistr/meta.yml @@ -0,0 +1,55 @@ +name: sistr +description: Serovar prediction of salmonella assemblies +keywords: + - bacteria + - fasta + - salmonella +tools: + - sistr: + description: Salmonella In Silico Typing Resource (SISTR) commandline tool for serovar prediction + homepage: https://github.com/phac-nml/sistr_cmd + documentation: https://github.com/phac-nml/sistr_cmd + tool_dev_url: https://github.com/phac-nml/sistr_cmd + doi: "10.1371/journal.pone.0147101" + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleotide or protein sequences in FASTA format + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz,faa,faa.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: SISTR serovar prediction + pattern: "*.{tsv}" + - allele_json: + type: file + description: Allele sequences and info to JSON + pattern: "*.{json}" + - allele_fasta: + type: file + description: FASTA file destination of novel cgMLST alleles + pattern: "*.{fasta}" + - cgmlst_csv: + type: file + description: CSV file destination for cgMLST allelic profiles + pattern: "*.{csv}" + +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9aa4c754..5ee2851f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1429,6 +1429,10 @@ shovill: - modules/shovill/** - tests/modules/shovill/** +sistr: + - modules/sistr/** + - tests/modules/sistr/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/sistr/main.nf b/tests/modules/sistr/main.nf new file mode 100644 index 00000000..4bd84844 --- /dev/null +++ b/tests/modules/sistr/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SISTR } from '../../../modules/sistr/main.nf' + +workflow test_sistr { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['haemophilus_influenzae']['genome']['genome_fna_gz'], checkIfExists: true) + ] + + SISTR ( input ) +} diff --git a/tests/modules/sistr/nextflow.config b/tests/modules/sistr/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/sistr/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/sistr/test.yml b/tests/modules/sistr/test.yml new file mode 100644 index 00000000..88182f28 --- /dev/null +++ b/tests/modules/sistr/test.yml @@ -0,0 +1,15 @@ +- name: sistr test_sistr + command: nextflow run tests/modules/sistr -entry test_sistr -c tests/config/nextflow.config + tags: + - sistr + files: + - path: output/sistr/test-allele.fasta + md5sum: 144a74999eb9dd01520be5c61e8bd210 + - path: output/sistr/test-allele.json + md5sum: 3eb993c9489904621f539a93ff9a90ec + - path: output/sistr/test-cgmlst.csv + md5sum: c50a2144955fe1b98a6d5792bf295088 + - path: output/sistr/test.tab + contains: ["cgmlst_ST", "serovar", "matched"] + - path: output/sistr/versions.yml + md5sum: 8b852f002c3ce67e3f6498da15b28296