Merge pull request #1574 from jvhagey/srst2/srst2

new module: srst2/srst2
This commit is contained in:
Sateesh Peri 2022-05-02 20:21:23 -04:00 committed by GitHub
commit 88a9a55c80
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 232 additions and 0 deletions

View file

@ -0,0 +1,47 @@
process SRST2_SRST2 {
tag "${meta.id}"
label 'process_low'
conda (params.enable_conda ? "bioconda::srst2=0.2.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/srst2%3A0.2.0--py27_2':
'quay.io/biocontainers/srst2:0.2.0--py27_2'}"
input:
tuple val(meta), path(fastq_s), path(db)
output:
tuple val(meta), path("*_genes_*_results.txt") , optional:true, emit: gene_results
tuple val(meta), path("*_fullgenes_*_results.txt") , optional:true, emit: fullgene_results
tuple val(meta), path("*_mlst_*_results.txt") , optional:true, emit: mlst_results
tuple val(meta), path("*.pileup") , emit: pileup
tuple val(meta), path("*.sorted.bam") , emit: sorted_bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
def read_s = meta.single_end ? "--input_se ${fastq_s}" : "--input_pe ${fastq_s[0]} ${fastq_s[1]}"
if (meta.db=="gene") {
database = "--gene_db ${db}"
} else if (meta.db=="mlst") {
database = "--mlst_db ${db}"
} else {
error "Please set meta.db to either \"gene\" or \"mlst\""
}
"""
srst2 \\
${read_s} \\
--threads $task.cpus \\
--output ${prefix} \\
${database} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
srst2: \$(echo \$(srst2 --version 2>&1) | sed 's/srst2 //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: srst2_srst2
description: |
Short Read Sequence Typing for Bacterial Pathogens is a program designed to take Illumina sequence data,
a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc)
and report the presence of STs and/or reference genes.
keywords:
- mlst
- typing
- illumina
tools:
- srst2:
description: "Short Read Sequence Typing for Bacterial Pathogens"
homepage: "http://katholt.github.io/srst2/"
documentation: "https://github.com/katholt/srst2/blob/master/README.md"
tool_dev_url: "https://github.com/katholt/srst2"
doi: "10.1186/s13073-014-0090-6"
licence: ["BSD"]
input:
- meta:
type: map0.2.0-4
description: |
Groovy Map containing sample information
id: should be the identification number or sample name
single_end: should be true for single end data and false for paired in data
db: should be either 'gene' to use the --gene_db option or "mlst" to use the --mlst_db option
e.g. [ id:'sample', single_end:false , db:'gene']
- fasta:
type: file
description: |
gzipped fasta file. If files are NOT in
MiSeq format sample_S1_L001_R1_001.fastq.gz uses --forward and --reverse parameters; otherwise
default is _1, i.e. expect forward reads as sample_1.fastq.gz).
pattern: "*.fastq.gz"
- db:
type: file
description: Database in FASTA format
pattern: "*.fasta"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'sample', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: A detailed report, with one row per gene per sample described here github.com/katholt/srst2#gene-typing
pattern: "*_fullgenes_*_results.txt"
- txt:
type: file
description: A tabulated summary report of samples x genes.
pattern: "*_genes_*_results.txt"
- txt:
type: file
description: A tabulated summary report of mlst subtyping.
pattern: "*_mlst_*_results.txt"
- bam:
type: file
description: Sorted BAM file
pattern: "*.sorted.bam"
- pileup:
type: file
description: SAMtools pileup file
pattern: "*.pileup"
authors:
- "@jvhagey"

View file

@ -1775,6 +1775,10 @@ sratools/prefetch:
- modules/sratools/prefetch/** - modules/sratools/prefetch/**
- tests/modules/sratools/prefetch/** - tests/modules/sratools/prefetch/**
srst2/srst2:
- modules/srst2/srst2/**
- tests/modules/srst2/srst2/**
ssuissero: ssuissero:
- modules/ssuissero/** - modules/ssuissero/**
- tests/modules/ssuissero/** - tests/modules/ssuissero/**

View file

@ -0,0 +1,53 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SRST2_SRST2 } from '../../../../modules/srst2/srst2/main.nf'
workflow test_srst2_srst2_exit {
input = [
[ id:'test', single_end:false, db:"test"], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
// [("")]
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta')
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_mlst {
input = [
[ id:'test', single_end:false, db:"mlst"], // meta map
[ file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/SRR9067271_1.fastq.gz", checkIfExists: true),
file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/SRR9067271_2.fastq.gz", checkIfExists: true) ],
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/MLST_DB.fas')
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_paired_end {
input = [
[ id:'test', single_end:false, db:"gene"], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_single_end {
input = [
[ id:'test', single_end:true, db:"gene" ], // meta map
file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
SRST2_SRST2(input)
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,51 @@
- name: srst2 srst2 test_srst2_srst2_exit #Testing pipeline exit when not meta.db
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_exit -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
exit_code: 1
- name: srst2 srst2 test_srst2_srst2_mlst
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_mlst -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__SRR9067271.MLST_DB.pileup
contains:
- "dnaJ-1 2 C 17 .........,....... FFFFFFFFFFFFFFFFF"
- path: output/srst2/test__SRR9067271.MLST_DB.sorted.bam
- path: output/srst2/test__mlst__MLST_DB__results.txt
md5sum: ec1b1f69933401d67c57f64cad11a098
- path: output/srst2/versions.yml
md5sum: a0c256a2fd3636069710b8ef22ee5ea7
- name: srst2 srst2 test_srst2_srst2_paired_end
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_paired_end -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__genes__resFinder_20180221_srst2__results.txt
md5sum: 099aa6cacec5524b311f606debdfb3a9
- path: output/srst2/test__test1.resFinder_20180221_srst2.pileup
md5sum: 64b512ff495b828c456405ec7b676ad1
- path: output/srst2/test__test1.resFinder_20180221_srst2.sorted.bam
- path: output/srst2/versions.yml
md5sum: b446a70f1a2b4f60757829bcd744a214
- name: srst2 srst2 test_srst2_srst2_single_end
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_single_end -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__fullgenes__resFinder_20180221_srst2__results.txt
md5sum: d0762ef8c38afd0e0a34cce52ed1a3db
- path: output/srst2/test__genes__resFinder_20180221_srst2__results.txt
md5sum: b8850c6644406d8b131e471ecc3f9013
- path: output/srst2/test__test1_1.resFinder_20180221_srst2.pileup
md5sum: 5f6279dc8124aa762a9dfe3d7a871277
- path: output/srst2/test__test1_1.resFinder_20180221_srst2.sorted.bam
- path: output/srst2/versions.yml
md5sum: 790fe00493c6634d17801a930073218b