Merge pull request #1602 from jasmezz/antismashlite

Add AntiSMASH-lite
This commit is contained in:
Jasmin F 2022-05-16 12:02:08 +02:00 committed by GitHub
commit c6fae2ae6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 286 additions and 0 deletions

View file

@ -0,0 +1,68 @@
process ANTISMASH_ANTISMASHLITE {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::antismash-lite=6.0.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/antismash-lite:6.0.1--pyhdfd78af_1' :
'quay.io/biocontainers/antismash-lite:6.0.1--pyhdfd78af_1' }"
containerOptions {
workflow.containerEngine == 'singularity' ?
"-B $antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" :
workflow.containerEngine == 'docker' ?
"-v \$PWD/$antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" :
''
}
input:
tuple val(meta), path(sequence_input)
path(databases)
path(antismash_dir) // Optional input: AntiSMASH installation folder. It is not needed for using this module with conda, but required for docker/singularity (see meta.yml).
path(gff)
output:
tuple val(meta), path("${prefix}/clusterblast/*_c*.txt") , optional: true, emit: clusterblast_file
tuple val(meta), path("${prefix}/{css,images,js}") , emit: html_accessory_files
tuple val(meta), path("${prefix}/knownclusterblast/region*/ctg*.html") , optional: true, emit: knownclusterblast_html
tuple val(meta), path("${prefix}/knownclusterblast/*_c*.txt") , optional: true, emit: knownclusterblast_txt
tuple val(meta), path("${prefix}/svg/clusterblast*.svg") , optional: true, emit: svg_files_clusterblast
tuple val(meta), path("${prefix}/svg/knownclusterblast*.svg") , optional: true, emit: svg_files_knownclusterblast
tuple val(meta), path("${prefix}/*.gbk") , emit: gbk_input
tuple val(meta), path("${prefix}/*.json") , emit: json_results
tuple val(meta), path("${prefix}/*.log") , emit: log
tuple val(meta), path("${prefix}/*.zip") , emit: zip
tuple val(meta), path("${prefix}/*region*.gbk") , emit: gbk_results
tuple val(meta), path("${prefix}/clusterblastoutput.txt") , optional: true, emit: clusterblastoutput
tuple val(meta), path("${prefix}/index.html") , emit: html
tuple val(meta), path("${prefix}/knownclusterblastoutput.txt") , optional: true, emit: knownclusterblastoutput
tuple val(meta), path("${prefix}/regions.js") , emit: json_sideloading
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}"
gff_flag = "--genefinding-gff3 ${gff}"
"""
## We specifically do not include annotations (--genefinding-tool none) as
## this should be run as a separate module for versioning purposes
antismash \\
$args \\
$gff_flag \\
-c $task.cpus \\
--output-dir $prefix \\
--genefinding-tool none \\
--logfile $prefix/${prefix}.log \\
--databases $databases \\
$sequence_input
cat <<-END_VERSIONS > versions.yml
"${task.process}":
antismash-lite: \$(antismash --version | sed 's/antiSMASH //')
END_VERSIONS
"""
}

View file

@ -0,0 +1,128 @@
name: antismash_antismashlite
description: |
antiSMASH allows the rapid genome-wide identification, annotation
and analysis of secondary metabolite biosynthesis gene clusters.
keywords:
- secondary metabolites
- BGC
- biosynthetic gene cluster
- genome mining
- NRPS
- RiPP
- antibiotics
- prokaryotes
- bacteria
- eukaryotes
- fungi
- antismash
tools:
- antismashlite:
description: "antiSMASH - the antibiotics and Secondary Metabolite Analysis SHell"
homepage: "https://docs.antismash.secondarymetabolites.org"
documentation: "https://docs.antismash.secondarymetabolites.org"
tool_dev_url: "https://github.com/antismash/antismash"
doi: "10.1093/nar/gkab335"
licence: "['AGPL v3']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- sequence_input:
type: file
description: nucleotide sequence file (annotated)
pattern: "*.{gbk, gb, gbff, genbank, embl, fasta, fna}"
- databases:
type: directory
description: downloaded AntiSMASH databases e.g. data/databases
pattern: "*/"
- antismash_dir:
type: directory
description: |
A local copy of an AntiSMASH installation folder. This is required when running with
docker and singularity (not required for conda), due to attempted 'modifications' of
files during database checks in the installation directory, something that cannot
be done in immutable docker/singularity containers. Therefore, a local installation
directory needs to be mounted (including all modified files from the downloading step)
to the container as a workaround.
pattern: "*/"
- gff:
type: file
pattern: "*.gff"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- clusterblast_file:
type: file
description: Output of ClusterBlast algorithm
pattern: "clusterblast/*_c*.txt"
- html_accessory_files:
type: directory
description: Accessory files for the HTML output
pattern: "{css/,images/,js/}"
- knownclusterblast_html:
type: file
description: Tables with MIBiG hits in HTML format
pattern: "knownclusterblast/region*/ctg*.html"
- knownclusterblast_txt:
type: file
description: Tables with MIBiG hits
pattern: "knownclusterblast/*_c*.txt"
- svg_files_clusterblast:
type: file
description: SVG images showing the % identity of the aligned hits against their queries
pattern: "svg/clusterblast*.svg"
- svg_files_knownclusterblast:
type: file
description: SVG images showing the % identity of the aligned hits against their queries
pattern: "svg/knownclusterblast*.svg"
- gbk_input:
type: file
description: Nucleotide sequence and annotations in GenBank format; converted from input file
pattern: "*.gbk"
- json_results:
type: file
description: Nucleotide sequence and annotations in JSON format; converted from GenBank file (gbk_input)
pattern: "*.json"
- log:
type: file
description: Contains all the logging output that antiSMASH produced during its run
pattern: "*.log"
- zip:
type: file
description: Contains a compressed version of the output folder in zip format
pattern: "*.zip"
- gbk_results:
type: file
description: Nucleotide sequence and annotations in GenBank format; one file per antiSMASH hit
pattern: "*region*.gbk"
- clusterblastoutput:
type: file
description: Raw BLAST output of known clusters previously predicted by antiSMASH using the built-in ClusterBlast algorithm
pattern: "clusterblastoutput.txt"
- html:
type: file
description: Graphical web view of results in HTML format
patterN: "index.html"
- knownclusterblastoutput:
type: file
description: Raw BLAST output of known clusters of the MIBiG database
pattern: "knownclusterblastoutput.txt"
- json_sideloading:
type: file
description: Sideloaded annotations of protoclusters and/or subregions (see antiSMASH documentation "Annotation sideloading")
pattern: "regions.js"
authors:
- "@jasmezz"

View file

@ -42,6 +42,10 @@ amrfinderplus/update:
- modules/amrfinderplus/update/** - modules/amrfinderplus/update/**
- tests/modules/amrfinderplus/update/** - tests/modules/amrfinderplus/update/**
antismash/antismashlite:
- modules/antismash/antismashlite/**
- tests/modules/antismash/antismashlite/**
antismash/antismashlitedownloaddatabases: antismash/antismashlitedownloaddatabases:
- modules/antismash/antismashlitedownloaddatabases/** - modules/antismash/antismashlitedownloaddatabases/**
- tests/modules/antismash/antismashlitedownloaddatabases/** - tests/modules/antismash/antismashlitedownloaddatabases/**

View file

@ -0,0 +1,46 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { ANTISMASH_ANTISMASHLITE } from '../../../../modules/antismash/antismashlite/main.nf'
include { ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES } from '../../../modules/antismash/antismashlitedownloaddatabases/main.nf'
include { GUNZIP as GUNZIP1 } from '../../../../modules/gunzip/main.nf'
include { GUNZIP as GUNZIP2 } from '../../../../modules/gunzip/main.nf'
include { UNTAR as UNTAR1 } from '../../../../modules/untar/main.nf'
include { UNTAR as UNTAR2 } from '../../../../modules/untar/main.nf'
include { UNTAR as UNTAR3 } from '../../../../modules/untar/main.nf'
workflow test_antismashlite {
genome_fna = [
[ id:'test' ],
file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
]
genome_gff = [
[],
file(params.test_data['bacteroides_fragilis']['genome']['genome_gff_gz'], checkIfExists: true)
]
antismash_css = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/css.tar.gz', checkIfExists: true)
]
antismash_detection = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/detection.tar.gz', checkIfExists: true)
]
antismash_modules = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/modules.tar.gz', checkIfExists: true)
]
GUNZIP1 ( genome_fna )
GUNZIP2 ( genome_gff )
UNTAR1 ( antismash_css )
UNTAR2 ( antismash_detection )
UNTAR3 ( antismash_modules )
ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES ( UNTAR1.out.untar.map{ it[1] }, UNTAR2.out.untar.map{ it[1] }, UNTAR3.out.untar.map{ it[1] } )
ANTISMASH_ANTISMASHLITE ( GUNZIP1.out.gunzip, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir, GUNZIP2.out.gunzip.map{ it[1] } )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,35 @@
- name: antismash antismashlite test_antismashlite
command: nextflow run tests/modules/antismash/antismashlite -entry test_antismashlite -c tests/config/nextflow.config
tags:
- antismash/antismashlite
- antismash
files:
- path: output/antismash/test/NZ_CP069563.1.region001.gbk
contains: ['/tool="antismash"']
- path: output/antismash/test/NZ_CP069563.1.region002.gbk
contains: ['/tool="antismash"']
- path: output/antismash/test/css/bacteria.css
md5sum: 8b3c2f8b143d5245a5f42f55803c532c
- path: output/antismash/test/genome.gbk
contains: ['/tool="antismash"']
- path: output/antismash/test/genome.json
contains: ['{"version": "6.0.1", "input_file": "genome.fna", "records": [{"id": "NZ_CP069563.1", "seq": {"data":']
- path: output/antismash/test/genome.zip
- path: output/antismash/test/index.html
md5sum: de787e865c3a1eec143a19d2facb4de4
- path: output/antismash/test/js/antismash.js
md5sum: 58e90c3d783ae014cc3d51849bcb50a2
- path: output/antismash/test/js/jquery.js
md5sum: 397754ba49e9e0cf4e7c190da78dda05
- path: output/antismash/test/js/jquery.tablesorter.min.js
md5sum: 5e9e08cef4d1be0eaa538e6eb28809a7
- path: output/antismash/test/regions.js
contains: ['"seq_id": "NZ_CP069563.1"']
- path: output/antismash/test/test.log
contains: ["antiSMASH version: 6.0.1"]
- path: output/antismash/versions.yml
md5sum: 759431a43da33e2ef8e2d0ebd79a439b
- path: output/gunzip1/genome.fna
md5sum: dafd38f5454b54fbea38245d773062a5
- path: output/gunzip2/genome.gff
md5sum: 9b9c848b1946d43fa68128f4d6316052