Add fasta/gff input option

This commit is contained in:
jasmezz 2022-05-11 12:06:02 +02:00
parent 9cce4a00b2
commit 5d76f1c7ff
4 changed files with 178 additions and 27 deletions

View file

@ -1,3 +1,76 @@
// process ANTISMASH_ANTISMASHLITE {
// tag "$meta.id"
// label 'process_medium'
// conda (params.enable_conda ? "bioconda::antismash-lite=6.0.1" : null)
// container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
// 'https://depot.galaxyproject.org/singularity/antismash-lite:6.0.1--pyhdfd78af_1' :
// 'quay.io/biocontainers/antismash-lite:6.0.1--pyhdfd78af_1' }"
// containerOptions {
// workflow.containerEngine == 'singularity' ?
// "-B $antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" :
// workflow.containerEngine == 'docker' ?
// "-v \$PWD/$antismash_dir:/usr/local/lib/python3.8/site-packages/antismash" :
// ''
// }
// input:
// tuple val(meta), path(sequence_input)
// path(databases)
// path(antismash_dir) // Optional input: AntiSMASH installation folder. It is not needed for using this module with conda, but required for docker/singularity (see meta.yml).
// output:
// tuple val(meta), path("${prefix}/clusterblast/*_c*.txt") , optional: true, emit: clusterblast_file
// tuple val(meta), path("${prefix}/css/*.css") , emit: css_file
// tuple val(meta), path("${prefix}/images") , emit: image_directory
// tuple val(meta), path("${prefix}/js/*.js") , emit: javascript
// tuple val(meta), path("${prefix}/knownclusterblast/region*/ctg*.html") , optional: true, emit: knownclusterblast_html
// tuple val(meta), path("${prefix}/knownclusterblast/*_c*.txt") , optional: true, emit: knownclusterblast_txt
// tuple val(meta), path("${prefix}/svg/clusterblast*.svg") , optional: true, emit: svg_files_clusterblast
// tuple val(meta), path("${prefix}/svg/knownclusterblast*.svg") , optional: true, emit: svg_files_knownclusterblast
// tuple val(meta), path("${prefix}/*.gbk") , emit: gbk_input
// tuple val(meta), path("${prefix}/*.json") , emit: json_results
// tuple val(meta), path("${prefix}/*.log") , emit: log
// tuple val(meta), path("${prefix}/*.zip") , emit: zip
// tuple val(meta), path("${prefix}/*region*.gbk") , emit: gbk_results
// tuple val(meta), path("${prefix}/clusterblastoutput.txt") , optional: true, emit: clusterblastoutput
// tuple val(meta), path("${prefix}/index.html") , emit: html
// tuple val(meta), path("${prefix}/knownclusterblastoutput.txt") , optional: true, emit: knownclusterblastoutput
// tuple val(meta), path("${prefix}/regions.js") , emit: json_sideloading
// path "versions.yml" , emit: versions
// when:
// task.ext.when == null || task.ext.when
// script:
// def args = task.ext.args ?: ''
// prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}"
// // if ( sequence_input.getExtension != 'fasta' && sequence_input.getExtension != 'fna' && gff )
// // log.warn "GFF input to antiSMASH can only be used if FASTA sequence input is supplied. GFF will be ignored for sample ${meta.id}"
// // if ( (sequence_input.getExtension == 'fasta' || sequence_input.getExtension == 'fna') && gff )
// // gff_flag = "--genefinding-gff3 ${gff}"
// // else
// // gff_flag = ""
// """
// ## We specifically do not include annotations (--genefinding-tool none) as
// ## this should be run as a separate module for versioning purposes
// antismash \\
// $args \\
// -c $task.cpus \\
// --output-dir $prefix \\
// --genefinding-tool none \\
// --logfile $prefix/${prefix}.log \\
// --databases $databases \\
// $sequence_input
// cat <<-END_VERSIONS > versions.yml
// "${task.process}":
// antismash-lite: \$(antismash --version | sed 's/antiSMASH //')
// END_VERSIONS
// """
// }
process ANTISMASH_ANTISMASHLITE {
tag "$meta.id"
label 'process_medium'
@ -17,9 +90,9 @@ process ANTISMASH_ANTISMASHLITE {
input:
tuple val(meta), path(sequence_input)
path(gff)
path(databases)
path(antismash_dir) // Optional input: AntiSMASH installation folder. It is not needed for using this module with conda, but required for docker/singularity (see meta.yml).
path(gff)
output:
tuple val(meta), path("${prefix}/clusterblast/*_c*.txt") , optional: true, emit: clusterblast_file
@ -47,24 +120,19 @@ process ANTISMASH_ANTISMASHLITE {
script:
def args = task.ext.args ?: ''
prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}"
if ( sequence_input.getExtension != 'fasta' && sequence_input.getExtension != 'fna' && gff )
log.warn "GFF input to antiSMASH can only be used if FASTA sequence input is supplied. GFF will be ignored for sample ${meta.id}"
if ( (sequence_input.getExtension == 'fasta' || sequence_input.getExtension == 'fna') && gff )
gff_flag = "--genefinding-gff3 ${gff}"
else
gff_flag = ""
gff_flag = "--genefinding-gff3 ${gff}"
"""
## We specifically do not include annotations (--genefinding-tool none) as
## this should be run as a separate module for versioning purposes
antismash \\
$args \\
$gff_flag \\
-c $task.cpus \\
--output-dir $prefix \\
--genefinding-tool none \\
--logfile $prefix/${prefix}.log \\
--databases $databases \\
$gff_flag \\
$sequence_input
cat <<-END_VERSIONS > versions.yml

View file

@ -35,10 +35,6 @@ input:
type: file
description: nucleotide sequence file (annotated)
pattern: "*.{gbk, gb, gbff, genbank, embl, fasta, fna}"
- gff:
type: file
description: GFF3 file to extract annotated features from (only needed if sequence_input is in FASTA format)
pattern: "*.{gff, GFF}"
- databases:
type: directory
description: downloaded AntiSMASH databases e.g. data/databases
@ -53,6 +49,9 @@ input:
directory needs to be mounted (including all modified files from the downloading step)
to the container as a workaround.
pattern: "*"
- gff:
type: file
pattern: "*.gff"
output:
- meta:

View file

@ -2,35 +2,106 @@
nextflow.enable.dsl = 2
include { ANTISMASH_ANTISMASHLITE } from '../../../../modules/antismash/antismashlite/main.nf'
include { ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES } from '../../../modules/antismash/antismashlitedownloaddatabases/main.nf'
include { GUNZIP as GUNZIP1 } from '../../../../modules/gunzip/main.nf'
include { GUNZIP as GUNZIP2 } from '../../../../modules/gunzip/main.nf'
include { UNTAR as UNTAR1 } from '../../../../modules/untar/main.nf'
include { UNTAR as UNTAR2 } from '../../../../modules/untar/main.nf'
include { UNTAR as UNTAR3 } from '../../../../modules/untar/main.nf'
include { ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES } from '../../../modules/antismash/antismashlitedownloaddatabases/main.nf'
include { ANTISMASH_ANTISMASHLITE } from '../../../../modules/antismash/antismashlite/main.nf'
workflow test_antismashlite {
input_genome = [
[ id:'test' ], // meta map
file(params.test_data['bacteroides_fragilis']['genome']['genome_gbff_gz'], checkIfExists: true) ]
genome_fna = [
[ id:'test' ],
file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
]
input_antismash_db1 = [
genome_gff = [
[],
file(params.test_data['bacteroides_fragilis']['genome']['genome_gff_gz'], checkIfExists: true)
]
antismash_db1 = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/css.tar.gz', checkIfExists: true)
]
input_antismash_db2 = [
antismash_db2 = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/detection.tar.gz', checkIfExists: true)
]
input_antismash_db3 = [
antismash_db3 = [
[],
file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/modules.tar.gz', checkIfExists: true)
]
UNTAR1 ( input_antismash_db1 )
UNTAR2 ( input_antismash_db2 )
UNTAR3 ( input_antismash_db3 )
// input_db = [
// file('/home/jasmin/antismash_db/', checkIfExists: true)
// ]
// input_dir = [
// file('/home/jasmin/antismash_dir/', checkIfExists: true)
// ]
GUNZIP1 ( genome_fna )
GUNZIP2 ( genome_gff )
UNTAR1 ( antismash_db1 )
UNTAR2 ( antismash_db2 )
UNTAR3 ( antismash_db3 )
ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES ( UNTAR1.out.untar.map{ it[1] }, UNTAR2.out.untar.map{ it[1] }, UNTAR3.out.untar.map{ it[1] } )
ANTISMASH_ANTISMASHLITE ( input_genome, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir, [] )
ANTISMASH_ANTISMASHLITE ( GUNZIP1.out.gunzip, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.database, ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES.out.antismash_dir, GUNZIP2.out.gunzip.map{ it[1] } )
// ANTISMASH_ANTISMASHLITE ( GUNZIP1.out.gunzip, input_db, input_dir, GUNZIP2.out.gunzip.map{ it[1] } )
}
// #!/usr/bin/env nextflow
// nextflow.enable.dsl = 2
// // include { UNTAR as UNTAR1 } from '../../../../modules/untar/main.nf'
// // include { UNTAR as UNTAR2 } from '../../../../modules/untar/main.nf'
// // include { UNTAR as UNTAR3 } from '../../../../modules/untar/main.nf'
// // // include { ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES } from '../../../modules/antismash/antismashlitedownloaddatabases/main.nf'
// include { ANTISMASH_ANTISMASHLITE } from '../../../../modules/antismash/antismashlite/main.nf'
// workflow test_antismashlite {
// input_genome = [
// [ id:'test' ], // meta map
// file('/home/jasmin/Downloads/BAN001-megahit.gbk', checkIfExists: true) ]
// // input_antismash_db1 = [
// // [],
// // file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/css.tar.gz', checkIfExists: true)
// // ]
// // input_antismash_db2 = [
// // [],
// // file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/detection.tar.gz', checkIfExists: true)
// // ]
// // input_antismash_db3 = [
// // [],
// // file('https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/antismash/modules.tar.gz', checkIfExists: true)
// // ]
// input_db = [
// [],
// file('/home/jasmin/antismash_db/', checkIfExists: true)
// ]
// input_dir = [
// [],
// file('/home/jasmin/antismash_dir', checkIfExists: true)
// ]
// // input_gff = [
// // [],
// // file('/home/jasmin/Downloads/BAN001.gff', checkIfExists: true)
// // ]
// // UNTAR1 ( input_antismash_db1 )
// // UNTAR2 ( input_antismash_db2 )
// // UNTAR3 ( input_antismash_db3 )
// // ANTISMASH_ANTISMASHLITEDOWNLOADDATABASES ( UNTAR1.out.untar.map{ it[1] }, UNTAR2.out.untar.map{ it[1] }, UNTAR3.out.untar.map{ it[1] } )
// ANTISMASH_ANTISMASHLITE ( input_genome, input_db, input_dir )
// }

View file

@ -13,11 +13,10 @@
- path: output/antismash/test/genome.gbk
contains: ['/tool="antismash"']
- path: output/antismash/test/genome.json
contains:
['{"version": "6.0.1", "input_file": "genome.gbff.gz", "records": [{"id": "NZ_CP069563.1", "seq": {"data":']
contains: ['{"version": "6.0.1", "input_file": "genome.fna.gz", "records": [{"id": "NZ_CP069563.1", "seq": {"data":']
- path: output/antismash/test/genome.zip
- path: output/antismash/test/index.html
md5sum: 32aaf51315258af3b300d9a5bafd7bdc
md5sum: de787e865c3a1eec143a19d2facb4de4
- path: output/antismash/test/js/antismash.js
md5sum: 58e90c3d783ae014cc3d51849bcb50a2
- path: output/antismash/test/js/jquery.js
@ -30,3 +29,17 @@
contains: ["antiSMASH version: 6.0.1"]
- path: output/antismash/versions.yml
md5sum: 759431a43da33e2ef8e2d0ebd79a439b
- path: output/gunzip1/genome.fna
md5sum: dafd38f5454b54fbea38245d773062a5
- path: output/gunzip1/versions.yml
md5sum: 854e3d45d43d2bc3bedf4bd6586e3e1f
- path: output/gunzip2/genome.gff
md5sum: 9b9c848b1946d43fa68128f4d6316052
- path: output/gunzip2/versions.yml
md5sum: 67bdab99aaaf1edfc0869bd2e6808036
- path: output/untar1/versions.yml
md5sum: 1e4721017721c45370996318e6b807e5
- path: output/untar2/versions.yml
md5sum: a6ae4977a432f3c5ef26687cec8622de
- path: output/untar3/versions.yml
md5sum: a5ee00c1c426ed601ff654891ba0f645