Merge branch 'vardict-java' of github.com:nvnieuwk/modules into vardict-java

This commit is contained in:
Nicolas Vannieuwkerke 2022-05-03 08:29:50 +02:00
commit 9127f92ddd
18 changed files with 358 additions and 50 deletions

View file

@ -27,9 +27,7 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES {
output:
path("antismash_db") , emit: database
path("css"), emit: css_dir
path("detection"), emit: detection_dir
path("modules"), emit: modules_dir
path("antismash_dir"), emit: antismash_dir
path "versions.yml", emit: versions
when:
@ -37,11 +35,19 @@ process ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES {
script:
def args = task.ext.args ?: ''
conda = params.enable_conda
"""
download-antismash-databases \\
--database-dir antismash_db \\
$args
if [[ $conda = false ]]; \
then \
cp -r /usr/local/lib/python3.8/site-packages/antismash antismash_dir; \
else \
cp -r \$(python -c 'import antismash;print(antismash.__file__.split("/__")[0])') antismash_dir; \
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
antismash-lite: \$(antismash --version | sed 's/antiSMASH //')

View file

@ -50,21 +50,11 @@ output:
type: directory
description: Download directory for antiSMASH databases
pattern: "antismash_db"
- css_dir:
- antismash_dir:
type: directory
description: |
antismash/outputs/html/css folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
pattern: "css"
- detection_dir:
type: directory
description: |
antismash/detection folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
pattern: "detection"
- modules_dir:
type: directory
description: |
antismash/modules folder which is being created during the antiSMASH database downloading step. These files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database in pipelines.
pattern: "modules"
antismash installation folder which is being modified during the antiSMASH database downloading step. The modified files are normally downloaded by download-antismash-databases itself, and must be retrieved by the user by manually running the command with conda or a standalone installation of antiSMASH. Therefore we do not recommend using this module for production pipelines, but rather require users to specify their own local copy of the antiSMASH database and installation folder in pipelines.
pattern: "antismash_dir"
authors:
- "@jasmezz"

View file

@ -10,10 +10,17 @@ process DIAMOND_BLASTP {
input:
tuple val(meta), path(fasta)
path db
val outext
val out_ext
val blast_columns
output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
tuple val(meta), path('*.blast'), optional: true, emit: blast
tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when:
@ -22,7 +29,8 @@ process DIAMOND_BLASTP {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) {
def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTP {
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
}
"""
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTP {
--threads $task.cpus \\
--db \$DB \\
--query $fasta \\
--outfmt ${outfmt} \\
--outfmt ${outfmt} ${columns} \\
$args \\
--out ${prefix}.${outext}
--out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory
description: Directory containing the protein blast database
pattern: "*"
- outext:
- out_ext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
@ -36,12 +36,42 @@ input:
`txt` corresponds to to BLAST tabular format. `tsv` corresponds to
taxonomic classification format.
pattern: "blast|xml|txt|daa|sam|tsv|paf"
- blast_columns:
type: string
description: |
Optional space separated list of DIAMOND tabular BLAST output keywords
used for in conjunction with the 'txt' out_ext option (--outfmt 6). See
DIAMOND documnetation for more information.
output:
- txt:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blastp.txt}"
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions:
type: file
description: File containing software versions

View file

@ -10,10 +10,17 @@ process DIAMOND_BLASTX {
input:
tuple val(meta), path(fasta)
path db
val outext
val out_ext
val blast_columns
output:
tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
tuple val(meta), path('*.blast'), optional: true, emit: blast
tuple val(meta), path('*.xml') , optional: true, emit: xml
tuple val(meta), path('*.txt') , optional: true, emit: txt
tuple val(meta), path('*.daa') , optional: true, emit: daa
tuple val(meta), path('*.sam') , optional: true, emit: sam
tuple val(meta), path('*.tsv') , optional: true, emit: tsv
tuple val(meta), path('*.paf') , optional: true, emit: paf
path "versions.yml" , emit: versions
when:
@ -22,7 +29,8 @@ process DIAMOND_BLASTX {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
switch ( outext ) {
def columns = blast_columns ? "${blast_columns}" : ''
switch ( out_ext ) {
case "blast": outfmt = 0; break
case "xml": outfmt = 5; break
case "txt": outfmt = 6; break
@ -30,6 +38,11 @@ process DIAMOND_BLASTX {
case "sam": outfmt = 101; break
case "tsv": outfmt = 102; break
case "paf": outfmt = 103; break
default:
outfmt = '6';
out_ext = 'txt';
log.warn("Unknown output file format provided (${out_ext}): selecting DIAMOND default of tabular BLAST output (txt)");
break
}
"""
DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`
@ -39,9 +52,9 @@ process DIAMOND_BLASTX {
--threads $task.cpus \\
--db \$DB \\
--query $fasta \\
--outfmt ${outfmt} \\
--outfmt ${outfmt} ${columns} \\
$args \\
--out ${prefix}.${outext}
--out ${prefix}.${out_ext}
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -28,7 +28,7 @@ input:
type: directory
description: Directory containing the nucelotide blast database
pattern: "*"
- outext:
- out_ext:
type: string
description: |
Specify the type of output file to be generated. `blast` corresponds to
@ -38,10 +38,34 @@ input:
pattern: "blast|xml|txt|daa|sam|tsv|paf"
output:
- blast:
type: file
description: File containing blastp hits
pattern: "*.{blast}"
- xml:
type: file
description: File containing blastp hits
pattern: "*.{xml}"
- txt:
type: file
description: File containing blastx hits
pattern: "*.{blastx.txt}"
description: File containing hits in tabular BLAST format.
pattern: "*.{txt}"
- daa:
type: file
description: File containing hits DAA format
pattern: "*.{daa}"
- sam:
type: file
description: File containing aligned reads in SAM format
pattern: "*.{sam}"
- tsv:
type: file
description: Tab separated file containing taxonomic classification of hits
pattern: "*.{tsv}"
- paf:
type: file
description: File containing aligned reads in pairwise mapping format format
pattern: "*.{paf}"
- versions:
type: file
description: File containing software versions

View file

@ -12,7 +12,7 @@ process GATK4_MARKDUPLICATES {
output:
tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , emit: bai
tuple val(meta), path("*.bai") , optional:true, emit: bai
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions

View file

@ -0,0 +1,47 @@
process SRST2_SRST2 {
tag "${meta.id}"
label 'process_low'
conda (params.enable_conda ? "bioconda::srst2=0.2.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/srst2%3A0.2.0--py27_2':
'quay.io/biocontainers/srst2:0.2.0--py27_2'}"
input:
tuple val(meta), path(fastq_s), path(db)
output:
tuple val(meta), path("*_genes_*_results.txt") , optional:true, emit: gene_results
tuple val(meta), path("*_fullgenes_*_results.txt") , optional:true, emit: fullgene_results
tuple val(meta), path("*_mlst_*_results.txt") , optional:true, emit: mlst_results
tuple val(meta), path("*.pileup") , emit: pileup
tuple val(meta), path("*.sorted.bam") , emit: sorted_bam
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ""
def prefix = task.ext.prefix ?: "${meta.id}"
def read_s = meta.single_end ? "--input_se ${fastq_s}" : "--input_pe ${fastq_s[0]} ${fastq_s[1]}"
if (meta.db=="gene") {
database = "--gene_db ${db}"
} else if (meta.db=="mlst") {
database = "--mlst_db ${db}"
} else {
error "Please set meta.db to either \"gene\" or \"mlst\""
}
"""
srst2 \\
${read_s} \\
--threads $task.cpus \\
--output ${prefix} \\
${database} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
srst2: \$(echo \$(srst2 --version 2>&1) | sed 's/srst2 //' ))
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: srst2_srst2
description: |
Short Read Sequence Typing for Bacterial Pathogens is a program designed to take Illumina sequence data,
a MLST database and/or a database of gene sequences (e.g. resistance genes, virulence genes, etc)
and report the presence of STs and/or reference genes.
keywords:
- mlst
- typing
- illumina
tools:
- srst2:
description: "Short Read Sequence Typing for Bacterial Pathogens"
homepage: "http://katholt.github.io/srst2/"
documentation: "https://github.com/katholt/srst2/blob/master/README.md"
tool_dev_url: "https://github.com/katholt/srst2"
doi: "10.1186/s13073-014-0090-6"
licence: ["BSD"]
input:
- meta:
type: map0.2.0-4
description: |
Groovy Map containing sample information
id: should be the identification number or sample name
single_end: should be true for single end data and false for paired in data
db: should be either 'gene' to use the --gene_db option or "mlst" to use the --mlst_db option
e.g. [ id:'sample', single_end:false , db:'gene']
- fasta:
type: file
description: |
gzipped fasta file. If files are NOT in
MiSeq format sample_S1_L001_R1_001.fastq.gz uses --forward and --reverse parameters; otherwise
default is _1, i.e. expect forward reads as sample_1.fastq.gz).
pattern: "*.fastq.gz"
- db:
type: file
description: Database in FASTA format
pattern: "*.fasta"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'sample', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- txt:
type: file
description: A detailed report, with one row per gene per sample described here github.com/katholt/srst2#gene-typing
pattern: "*_fullgenes_*_results.txt"
- txt:
type: file
description: A tabulated summary report of samples x genes.
pattern: "*_genes_*_results.txt"
- txt:
type: file
description: A tabulated summary report of mlst subtyping.
pattern: "*_mlst_*_results.txt"
- bam:
type: file
description: Sorted BAM file
pattern: "*.sorted.bam"
- pileup:
type: file
description: SAMtools pileup file
pattern: "*.pileup"
authors:
- "@jvhagey"

View file

@ -1775,6 +1775,10 @@ sratools/prefetch:
- modules/sratools/prefetch/**
- tests/modules/sratools/prefetch/**
srst2/srst2:
- modules/srst2/srst2/**
- tests/modules/srst2/srst2/**
ssuissero:
- modules/ssuissero/**
- tests/modules/ssuissero/**

View file

@ -1,8 +1,8 @@
- name: antismash antismashlitedownloaddatabases test_antismash_antismashlitedownloaddatabases
command: nextflow run tests/modules/antismash/antismashlitedownloaddatabases -entry test_antismash_antismashlitedownloaddatabases -c tests/config/nextflow.config
tags:
- antismash
- antismash/antismashlitedownloaddatabases
- antismash
files:
- path: output/antismash/versions.yml
md5sum: 24859c67023abab99de295d3675a24b6
@ -12,6 +12,5 @@
- path: output/antismash/antismash_db/pfam
- path: output/antismash/antismash_db/resfam
- path: output/antismash/antismash_db/tigrfam
- path: output/antismash/css
- path: output/antismash/detection
- path: output/antismash/modules
- path: output/antismash/antismash_dir
- path: output/antismash/antismash_dir/detection/hmm_detection/data/bgc_seeds.hmm

View file

@ -9,18 +9,20 @@ workflow test_diamond_blastp {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'txt'
out_ext = 'txt'
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
}
workflow test_diamond_blastp_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
outext = 'daa'
out_ext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
}

View file

@ -5,7 +5,6 @@
- diamond
files:
- path: output/diamond/test.diamond_blastp.txt
md5sum: 2515cf88590afa32356497e79a51fce9
- path: output/diamond/versions.yml
- name: diamond blastp test_diamond_blastp_daa
@ -15,5 +14,4 @@
- diamond
files:
- path: output/diamond/test.diamond_blastp.daa
md5sum: 0b539c68a5b66dd6e20ad5d218f4f4c6
- path: output/diamond/versions.yml

View file

@ -9,18 +9,20 @@ workflow test_diamond_blastx {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'txt'
out_ext = 'tfdfdt' // Nonsense file extension to check default case.
blast_columns = 'qseqid qlen'
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
}
workflow test_diamond_blastx_daa {
db = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
outext = 'daa'
out_ext = 'daa'
blast_columns = []
DIAMOND_MAKEDB ( db )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, out_ext, blast_columns )
}

View file

@ -5,7 +5,6 @@
- diamond/blastx
files:
- path: output/diamond/test.diamond_blastx.txt
md5sum: eb2aebfa1cb42fcb2121c65528663307
- path: output/diamond/versions.yml
- name: diamond blastx test_diamond_blastx_daa

View file

@ -0,0 +1,53 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { SRST2_SRST2 } from '../../../../modules/srst2/srst2/main.nf'
workflow test_srst2_srst2_exit {
input = [
[ id:'test', single_end:false, db:"test"], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
// [("")]
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta')
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_mlst {
input = [
[ id:'test', single_end:false, db:"mlst"], // meta map
[ file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/SRR9067271_1.fastq.gz", checkIfExists: true),
file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/SRR9067271_2.fastq.gz", checkIfExists: true) ],
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/MLST_DB.fas')
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_paired_end {
input = [
[ id:'test', single_end:false, db:"gene"], // meta map
[ file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file(params.test_data['bacteroides_fragilis']['illumina']['test1_2_fastq_gz'], checkIfExists: true) ],
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
SRST2_SRST2(input)
}
workflow test_srst2_srst2_single_end {
input = [
[ id:'test', single_end:true, db:"gene" ], // meta map
file(params.test_data['bacteroides_fragilis']['illumina']['test1_1_fastq_gz'], checkIfExists: true),
file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/resFinder_20180221_srst2.fasta') // Change to params.test_data syntax after the data is included in tests/config/test_data.config
]
SRST2_SRST2(input)
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,51 @@
- name: srst2 srst2 test_srst2_srst2_exit #Testing pipeline exit when not meta.db
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_exit -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
exit_code: 1
- name: srst2 srst2 test_srst2_srst2_mlst
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_mlst -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__SRR9067271.MLST_DB.pileup
contains:
- "dnaJ-1 2 C 17 .........,....... FFFFFFFFFFFFFFFFF"
- path: output/srst2/test__SRR9067271.MLST_DB.sorted.bam
- path: output/srst2/test__mlst__MLST_DB__results.txt
md5sum: ec1b1f69933401d67c57f64cad11a098
- path: output/srst2/versions.yml
md5sum: a0c256a2fd3636069710b8ef22ee5ea7
- name: srst2 srst2 test_srst2_srst2_paired_end
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_paired_end -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__genes__resFinder_20180221_srst2__results.txt
md5sum: 099aa6cacec5524b311f606debdfb3a9
- path: output/srst2/test__test1.resFinder_20180221_srst2.pileup
md5sum: 64b512ff495b828c456405ec7b676ad1
- path: output/srst2/test__test1.resFinder_20180221_srst2.sorted.bam
- path: output/srst2/versions.yml
md5sum: b446a70f1a2b4f60757829bcd744a214
- name: srst2 srst2 test_srst2_srst2_single_end
command: nextflow run tests/modules/srst2/srst2 -entry test_srst2_srst2_single_end -c tests/config/nextflow.config
tags:
- srst2/srst2
- srst2
files:
- path: output/srst2/test__fullgenes__resFinder_20180221_srst2__results.txt
md5sum: d0762ef8c38afd0e0a34cce52ed1a3db
- path: output/srst2/test__genes__resFinder_20180221_srst2__results.txt
md5sum: b8850c6644406d8b131e471ecc3f9013
- path: output/srst2/test__test1_1.resFinder_20180221_srst2.pileup
md5sum: 5f6279dc8124aa762a9dfe3d7a871277
- path: output/srst2/test__test1_1.resFinder_20180221_srst2.sorted.bam
- path: output/srst2/versions.yml
md5sum: 790fe00493c6634d17801a930073218b