mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-21 18:58:16 +00:00
Add gecco/run module (#1790)
* Add gecco/run module * Fix container URLs * Apply suggestions from code review Co-authored-by: Jasmin F <73216762+jasmezz@users.noreply.github.com>
This commit is contained in:
parent
280eec5317
commit
9d7208504d
6 changed files with 153 additions and 0 deletions
47
modules/gecco/run/main.nf
Normal file
47
modules/gecco/run/main.nf
Normal file
|
@ -0,0 +1,47 @@
|
|||
process GECCO_RUN {
|
||||
tag "$meta.id"
|
||||
label 'process_low'
|
||||
|
||||
conda (params.enable_conda ? "bioconda::gecco=0.9.2" : null)
|
||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||
'https://depot.galaxyproject.org/singularity/gecco:0.9.2--pyhdfd78af_0':
|
||||
'quay.io/biocontainers/gecco:0.9.2--pyhdfd78af_0' }"
|
||||
|
||||
input:
|
||||
tuple val(meta), path(input), path(hmm)
|
||||
path model_dir
|
||||
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.genes.tsv") , emit: genes
|
||||
tuple val(meta), path("*.features.tsv") , emit: features
|
||||
tuple val(meta), path("*.clusters.tsv") , emit: clusters
|
||||
tuple val(meta), path("*_cluster_*.gbk"), optional: true, emit: gbk
|
||||
tuple val(meta), path("*.json") , optional: true, emit: json
|
||||
|
||||
path "versions.yml" , emit: versions
|
||||
|
||||
when:
|
||||
task.ext.when == null || task.ext.when
|
||||
|
||||
script:
|
||||
def args = task.ext.args ?: ''
|
||||
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||
def custom_model = model_dir ? "--model ${model_dir}" : ""
|
||||
def custom_hmm = hmm ? "--hmm ${hmm}" : ""
|
||||
"""
|
||||
gecco \\
|
||||
run \\
|
||||
$args \\
|
||||
-j $task.cpus \\
|
||||
-o ./ \\
|
||||
-g ${input} \\
|
||||
$custom_model \\
|
||||
$custom_hmm
|
||||
|
||||
cat <<-END_VERSIONS > versions.yml
|
||||
"${task.process}":
|
||||
gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' )
|
||||
END_VERSIONS
|
||||
"""
|
||||
}
|
67
modules/gecco/run/meta.yml
Normal file
67
modules/gecco/run/meta.yml
Normal file
|
@ -0,0 +1,67 @@
|
|||
name: "gecco_run"
|
||||
description: GECCO is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).
|
||||
keywords:
|
||||
- bgc
|
||||
- detection
|
||||
- metagenomics
|
||||
- contigs
|
||||
tools:
|
||||
- "gecco":
|
||||
description: "Biosynthetic Gene Cluster prediction with Conditional Random Fields."
|
||||
homepage: "https://gecco.embl.de"
|
||||
documentation: "https://gecco.embl.de"
|
||||
tool_dev_url: "https://github.com/zellerlab/GECCO"
|
||||
doi: "10.1101/2021.05.03.442509"
|
||||
licence: "['GPL v3']"
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- input:
|
||||
type: file
|
||||
description: A genomic file containing one or more sequences as input. Input type is any supported by Biopython (fasta, gbk, etc.)
|
||||
pattern: "*"
|
||||
- hmm:
|
||||
file: file
|
||||
description: Alternative HMM file(s) to use in HMMER format
|
||||
pattern: "*.hmm"
|
||||
- model_dir:
|
||||
file: directory
|
||||
description: Path to an alternative CRF (Conditional Random Fields) module to use
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- versions:
|
||||
type: file
|
||||
description: File containing software versions
|
||||
pattern: "versions.yml"
|
||||
- genes:
|
||||
type: file
|
||||
description: TSV file containing detected/predicted genes with BGC probability scores
|
||||
pattern: "*.genes.tsv"
|
||||
- features:
|
||||
type: file
|
||||
description: TSV file containing identified domains
|
||||
pattern: "*.features.tsv"
|
||||
- clusters:
|
||||
type: file
|
||||
description: TSV file containing coordinates of predicted clusters and BGC types
|
||||
pattern: "*.clusters.tsv"
|
||||
- gbk:
|
||||
type: file
|
||||
description: Per cluster GenBank file (if found) containing sequence with annotations
|
||||
pattern: "*.gbk"
|
||||
- json:
|
||||
type: file
|
||||
description: AntiSMASH v6 sideload JSON file (if --antismash-sideload) supplied
|
||||
pattern: "*.gbk"
|
||||
|
||||
authors:
|
||||
- "@jfy133"
|
|
@ -915,6 +915,10 @@ gatk4/variantrecalibrator:
|
|||
- modules/gatk4/variantrecalibrator/**
|
||||
- tests/modules/gatk4/variantrecalibrator/**
|
||||
|
||||
gecco/run:
|
||||
- modules/gecco/run/**
|
||||
- tests/modules/gecco/run/**
|
||||
|
||||
genescopefk:
|
||||
- modules/genescopefk/**
|
||||
- tests/modules/genescopefk/**
|
||||
|
|
17
tests/modules/gecco/run/main.nf
Normal file
17
tests/modules/gecco/run/main.nf
Normal file
|
@ -0,0 +1,17 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { GECCO_RUN } from '../../../../modules/gecco/run/main.nf'
|
||||
|
||||
workflow test_gecco_run {
|
||||
|
||||
input = [
|
||||
[ id:'test', single_end:false ], // meta map
|
||||
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true),
|
||||
[]
|
||||
]
|
||||
model_dir = []
|
||||
|
||||
GECCO_RUN ( input, model_dir )
|
||||
}
|
5
tests/modules/gecco/run/nextflow.config
Normal file
5
tests/modules/gecco/run/nextflow.config
Normal file
|
@ -0,0 +1,5 @@
|
|||
process {
|
||||
|
||||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||
|
||||
}
|
13
tests/modules/gecco/run/test.yml
Normal file
13
tests/modules/gecco/run/test.yml
Normal file
|
@ -0,0 +1,13 @@
|
|||
- name: gecco run test_gecco_run
|
||||
command: nextflow run ./tests/modules/gecco/run -entry test_gecco_run -c ./tests/config/nextflow.config -c ./tests/modules/gecco/run/nextflow.config
|
||||
tags:
|
||||
- gecco/run
|
||||
- gecco
|
||||
files:
|
||||
- path: output/gecco/NC_018507.1_cluster_1.gbk
|
||||
- path: output/gecco/genome.clusters.tsv
|
||||
md5sum: 6560ec765f6bbae50645896fd93b35cd
|
||||
- path: output/gecco/genome.features.tsv
|
||||
md5sum: 07492cf186003311ba9829056c65787f
|
||||
- path: output/gecco/genome.genes.tsv
|
||||
md5sum: 031a37dc603f8a5296f462608fd7bcc3
|
Loading…
Reference in a new issue