mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Add gecco/run module (#1790)
* Add gecco/run module * Fix container URLs * Apply suggestions from code review Co-authored-by: Jasmin F <73216762+jasmezz@users.noreply.github.com>
This commit is contained in:
parent
280eec5317
commit
9d7208504d
6 changed files with 153 additions and 0 deletions
47
modules/gecco/run/main.nf
Normal file
47
modules/gecco/run/main.nf
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
process GECCO_RUN {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::gecco=0.9.2" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/gecco:0.9.2--pyhdfd78af_0':
|
||||||
|
'quay.io/biocontainers/gecco:0.9.2--pyhdfd78af_0' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(input), path(hmm)
|
||||||
|
path model_dir
|
||||||
|
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("*.genes.tsv") , emit: genes
|
||||||
|
tuple val(meta), path("*.features.tsv") , emit: features
|
||||||
|
tuple val(meta), path("*.clusters.tsv") , emit: clusters
|
||||||
|
tuple val(meta), path("*_cluster_*.gbk"), optional: true, emit: gbk
|
||||||
|
tuple val(meta), path("*.json") , optional: true, emit: json
|
||||||
|
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
def custom_model = model_dir ? "--model ${model_dir}" : ""
|
||||||
|
def custom_hmm = hmm ? "--hmm ${hmm}" : ""
|
||||||
|
"""
|
||||||
|
gecco \\
|
||||||
|
run \\
|
||||||
|
$args \\
|
||||||
|
-j $task.cpus \\
|
||||||
|
-o ./ \\
|
||||||
|
-g ${input} \\
|
||||||
|
$custom_model \\
|
||||||
|
$custom_hmm
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
gecco: \$(echo \$(gecco --version) | cut -f 2 -d ' ' )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
67
modules/gecco/run/meta.yml
Normal file
67
modules/gecco/run/meta.yml
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
name: "gecco_run"
|
||||||
|
description: GECCO is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).
|
||||||
|
keywords:
|
||||||
|
- bgc
|
||||||
|
- detection
|
||||||
|
- metagenomics
|
||||||
|
- contigs
|
||||||
|
tools:
|
||||||
|
- "gecco":
|
||||||
|
description: "Biosynthetic Gene Cluster prediction with Conditional Random Fields."
|
||||||
|
homepage: "https://gecco.embl.de"
|
||||||
|
documentation: "https://gecco.embl.de"
|
||||||
|
tool_dev_url: "https://github.com/zellerlab/GECCO"
|
||||||
|
doi: "10.1101/2021.05.03.442509"
|
||||||
|
licence: "['GPL v3']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- input:
|
||||||
|
type: file
|
||||||
|
description: A genomic file containing one or more sequences as input. Input type is any supported by Biopython (fasta, gbk, etc.)
|
||||||
|
pattern: "*"
|
||||||
|
- hmm:
|
||||||
|
file: file
|
||||||
|
description: Alternative HMM file(s) to use in HMMER format
|
||||||
|
pattern: "*.hmm"
|
||||||
|
- model_dir:
|
||||||
|
file: directory
|
||||||
|
description: Path to an alternative CRF (Conditional Random Fields) module to use
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test', single_end:false ]
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- genes:
|
||||||
|
type: file
|
||||||
|
description: TSV file containing detected/predicted genes with BGC probability scores
|
||||||
|
pattern: "*.genes.tsv"
|
||||||
|
- features:
|
||||||
|
type: file
|
||||||
|
description: TSV file containing identified domains
|
||||||
|
pattern: "*.features.tsv"
|
||||||
|
- clusters:
|
||||||
|
type: file
|
||||||
|
description: TSV file containing coordinates of predicted clusters and BGC types
|
||||||
|
pattern: "*.clusters.tsv"
|
||||||
|
- gbk:
|
||||||
|
type: file
|
||||||
|
description: Per cluster GenBank file (if found) containing sequence with annotations
|
||||||
|
pattern: "*.gbk"
|
||||||
|
- json:
|
||||||
|
type: file
|
||||||
|
description: AntiSMASH v6 sideload JSON file (if --antismash-sideload) supplied
|
||||||
|
pattern: "*.gbk"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@jfy133"
|
|
@ -915,6 +915,10 @@ gatk4/variantrecalibrator:
|
||||||
- modules/gatk4/variantrecalibrator/**
|
- modules/gatk4/variantrecalibrator/**
|
||||||
- tests/modules/gatk4/variantrecalibrator/**
|
- tests/modules/gatk4/variantrecalibrator/**
|
||||||
|
|
||||||
|
gecco/run:
|
||||||
|
- modules/gecco/run/**
|
||||||
|
- tests/modules/gecco/run/**
|
||||||
|
|
||||||
genescopefk:
|
genescopefk:
|
||||||
- modules/genescopefk/**
|
- modules/genescopefk/**
|
||||||
- tests/modules/genescopefk/**
|
- tests/modules/genescopefk/**
|
||||||
|
|
17
tests/modules/gecco/run/main.nf
Normal file
17
tests/modules/gecco/run/main.nf
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { GECCO_RUN } from '../../../../modules/gecco/run/main.nf'
|
||||||
|
|
||||||
|
workflow test_gecco_run {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test', single_end:false ], // meta map
|
||||||
|
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true),
|
||||||
|
[]
|
||||||
|
]
|
||||||
|
model_dir = []
|
||||||
|
|
||||||
|
GECCO_RUN ( input, model_dir )
|
||||||
|
}
|
5
tests/modules/gecco/run/nextflow.config
Normal file
5
tests/modules/gecco/run/nextflow.config
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
}
|
13
tests/modules/gecco/run/test.yml
Normal file
13
tests/modules/gecco/run/test.yml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
- name: gecco run test_gecco_run
|
||||||
|
command: nextflow run ./tests/modules/gecco/run -entry test_gecco_run -c ./tests/config/nextflow.config -c ./tests/modules/gecco/run/nextflow.config
|
||||||
|
tags:
|
||||||
|
- gecco/run
|
||||||
|
- gecco
|
||||||
|
files:
|
||||||
|
- path: output/gecco/NC_018507.1_cluster_1.gbk
|
||||||
|
- path: output/gecco/genome.clusters.tsv
|
||||||
|
md5sum: 6560ec765f6bbae50645896fd93b35cd
|
||||||
|
- path: output/gecco/genome.features.tsv
|
||||||
|
md5sum: 07492cf186003311ba9829056c65787f
|
||||||
|
- path: output/gecco/genome.genes.tsv
|
||||||
|
md5sum: 031a37dc603f8a5296f462608fd7bcc3
|
Loading…
Reference in a new issue