mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-14 05:43:08 +00:00
new module: deepbgc/pipeline (#2014)
* not working yet (db not found)
* modify deeparg/download module to return db-path
* 🪄
* Prettier
* add test.yml
* much prettier
* test.yml delete md5 for pot. empty files
* adapt test.yml
* test.yml again
* Apply suggestions from code review
Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
This commit is contained in:
parent
f2264c1052
commit
eae945721d
7 changed files with 275 additions and 1 deletions
|
@ -8,7 +8,7 @@ process DEEPBGC_DOWNLOAD {
|
||||||
'quay.io/biocontainers/deepbgc:0.1.30--pyhb7b1952_1' }"
|
'quay.io/biocontainers/deepbgc:0.1.30--pyhb7b1952_1' }"
|
||||||
|
|
||||||
output:
|
output:
|
||||||
path "deepbgc_db" , emit: db
|
path "deepbgc_db/" , emit: db
|
||||||
path "versions.yml" , emit: versions
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
when:
|
when:
|
||||||
|
|
47
modules/deepbgc/pipeline/main.nf
Normal file
47
modules/deepbgc/pipeline/main.nf
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
process DEEPBGC_PIPELINE {
|
||||||
|
tag "$meta.id"
|
||||||
|
label 'process_low'
|
||||||
|
|
||||||
|
conda (params.enable_conda ? "bioconda::deepbgc=0.1.30" : null)
|
||||||
|
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
|
||||||
|
'https://depot.galaxyproject.org/singularity/deepbgc:0.1.30--pyhb7b1952_1':
|
||||||
|
'quay.io/biocontainers/deepbgc:0.1.30--pyhb7b1952_1' }"
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(meta), path(genome)
|
||||||
|
path(db)
|
||||||
|
|
||||||
|
output:
|
||||||
|
tuple val(meta), path("${genome.baseName}/README.txt") , optional: true, emit: readme
|
||||||
|
tuple val(meta), path("${genome.baseName}/LOG.txt") , emit: log
|
||||||
|
tuple val(meta), path("${genome.baseName}/${genome.baseName}.antismash.json") , optional: true, emit: json
|
||||||
|
tuple val(meta), path("${genome.baseName}/${genome.baseName}.bgc.gbk") , optional: true, emit: bgc_gbk
|
||||||
|
tuple val(meta), path("${genome.baseName}/${genome.baseName}.bgc.tsv") , optional: true, emit: bgc_tsv
|
||||||
|
tuple val(meta), path("${genome.baseName}/${genome.baseName}.full.gbk") , optional: true, emit: full_gbk
|
||||||
|
tuple val(meta), path("${genome.baseName}/${genome.baseName}.pfam.tsv") , optional: true, emit: pfam_tsv
|
||||||
|
tuple val(meta), path("${genome.baseName}/evaluation/${genome.baseName}.bgc.png") , optional: true, emit: bgc_png
|
||||||
|
tuple val(meta), path("${genome.baseName}/evaluation/${genome.baseName}.pr.png") , optional: true, emit: pr_png
|
||||||
|
tuple val(meta), path("${genome.baseName}/evaluation/${genome.baseName}.roc.png") , optional: true, emit: roc_png
|
||||||
|
tuple val(meta), path("${genome.baseName}/evaluation/${genome.baseName}.score.png") , optional: true, emit: score_png
|
||||||
|
path "versions.yml" , emit: versions
|
||||||
|
|
||||||
|
when:
|
||||||
|
task.ext.when == null || task.ext.when
|
||||||
|
|
||||||
|
script:
|
||||||
|
def args = task.ext.args ?: ''
|
||||||
|
def prefix = task.ext.prefix ?: "${meta.id}"
|
||||||
|
"""
|
||||||
|
export DEEPBGC_DOWNLOADS_DIR=${db}
|
||||||
|
|
||||||
|
deepbgc \\
|
||||||
|
pipeline \\
|
||||||
|
$args \\
|
||||||
|
$genome
|
||||||
|
|
||||||
|
cat <<-END_VERSIONS > versions.yml
|
||||||
|
"${task.process}":
|
||||||
|
deepbgc: \$(echo \$(deepbgc info 2>&1 /dev/null/ | grep 'version' | cut -d " " -f3) )
|
||||||
|
END_VERSIONS
|
||||||
|
"""
|
||||||
|
}
|
88
modules/deepbgc/pipeline/meta.yml
Normal file
88
modules/deepbgc/pipeline/meta.yml
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
name: "deepbgc_pipeline"
|
||||||
|
description: detect BGCs in bacterial and fungal genomes using deep learning
|
||||||
|
keywords:
|
||||||
|
- Biosynthetic Gene Cluster
|
||||||
|
- deep learning
|
||||||
|
- neural network
|
||||||
|
- random forest
|
||||||
|
- genomes
|
||||||
|
- bacteria
|
||||||
|
- fungi
|
||||||
|
tools:
|
||||||
|
- "deepbgc":
|
||||||
|
description: "DeepBGC - Biosynthetic Gene Cluster detection and classification"
|
||||||
|
homepage: "https://github.com/Merck/deepbgc"
|
||||||
|
documentation: "https://github.com/Merck/deepbgc"
|
||||||
|
tool_dev_url: "https://github.com/Merck/deepbgc"
|
||||||
|
doi: "10.1093/nar/gkz654"
|
||||||
|
licence: "['MIT']"
|
||||||
|
|
||||||
|
input:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test' ]
|
||||||
|
- genome:
|
||||||
|
type: file
|
||||||
|
description: FASTA/GenBank/Pfam CSV file
|
||||||
|
pattern: "*.{fasta,fa,fna,gbk,csv}"
|
||||||
|
|
||||||
|
output:
|
||||||
|
- meta:
|
||||||
|
type: map
|
||||||
|
description: |
|
||||||
|
Groovy Map containing sample information
|
||||||
|
e.g. [ id:'test']
|
||||||
|
- versions:
|
||||||
|
type: file
|
||||||
|
description: File containing software versions
|
||||||
|
pattern: "versions.yml"
|
||||||
|
- readme:
|
||||||
|
type: file
|
||||||
|
description: txt file containing description of output files
|
||||||
|
pattern: "*.{txt}"
|
||||||
|
- log:
|
||||||
|
type: file
|
||||||
|
description: Log output of DeepBGC
|
||||||
|
pattern: "*.{txt}"
|
||||||
|
- json:
|
||||||
|
type: file
|
||||||
|
description: AntiSMASH JSON file for sideloading.
|
||||||
|
pattern: "*.{json}"
|
||||||
|
- bgc_gbk:
|
||||||
|
type: file
|
||||||
|
description: Sequences and features of all detected BGCs in GenBank format.
|
||||||
|
pattern: "*.{bgc.gbk}"
|
||||||
|
- bgc_tsv:
|
||||||
|
type: file
|
||||||
|
description: Table of detected BGCs and their properties.
|
||||||
|
pattern: "*.{bgc.tsv}"
|
||||||
|
- full_gbk:
|
||||||
|
type: file
|
||||||
|
description: Fully annotated input sequence with proteins, Pfam domains (PFAM_domain features) and BGCs (cluster features)
|
||||||
|
pattern: "*.{full.gbk}"
|
||||||
|
- pfam_tsv:
|
||||||
|
type: file
|
||||||
|
description: Table of Pfam domains (pfam_id) from given sequence (sequence_id) in genomic order, with BGC detection scores.
|
||||||
|
pattern: "*.{pfam.tsv}"
|
||||||
|
- bgc_png:
|
||||||
|
type: file
|
||||||
|
description: Detected BGCs plotted by their nucleotide coordinates.
|
||||||
|
pattern: "*.{bgc.png}"
|
||||||
|
- pr_png:
|
||||||
|
type: file
|
||||||
|
description: Precision-Recall curve based on predicted per-Pfam BGC scores.
|
||||||
|
pattern: "*.{pr.png}"
|
||||||
|
- roc_png:
|
||||||
|
type: file
|
||||||
|
description: ROC curve based on predicted per-Pfam BGC scores.
|
||||||
|
pattern: "*.{roc.png}"
|
||||||
|
- score_png:
|
||||||
|
type: file
|
||||||
|
description: BGC detection scores of each Pfam domain in genomic order.
|
||||||
|
pattern: "*.{score.png}"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@louperelo"
|
||||||
|
- "@jfy133"
|
|
@ -607,6 +607,10 @@ deepbgc/download:
|
||||||
- modules/deepbgc/download/**
|
- modules/deepbgc/download/**
|
||||||
- tests/modules/deepbgc/download/**
|
- tests/modules/deepbgc/download/**
|
||||||
|
|
||||||
|
deepbgc/pipeline:
|
||||||
|
- modules/deepbgc/pipeline/**
|
||||||
|
- tests/modules/deepbgc/pipeline/**
|
||||||
|
|
||||||
deeptools/bamcoverage:
|
deeptools/bamcoverage:
|
||||||
- modules/deeptools/bamcoverage/**
|
- modules/deeptools/bamcoverage/**
|
||||||
- tests/modules/deeptools/bamcoverage/**
|
- tests/modules/deeptools/bamcoverage/**
|
||||||
|
|
33
tests/modules/deepbgc/pipeline/main.nf
Normal file
33
tests/modules/deepbgc/pipeline/main.nf
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
|
||||||
|
nextflow.enable.dsl = 2
|
||||||
|
|
||||||
|
include { GUNZIP } from '../../../modules/gunzip/main.nf'
|
||||||
|
include { PRODIGAL } from '../../../modules/prodigal/main.nf'
|
||||||
|
include { DEEPBGC_DOWNLOAD } from '../../../../modules/deepbgc/download/main.nf'
|
||||||
|
include { DEEPBGC_PIPELINE } from '../../../../modules/deepbgc/pipeline/main.nf'
|
||||||
|
|
||||||
|
workflow test_deepbgc_pipeline_gbk {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test_gbk', single_end:false ], // meta map
|
||||||
|
file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
|
||||||
|
DEEPBGC_DOWNLOAD ()
|
||||||
|
GUNZIP ( input )
|
||||||
|
PRODIGAL ( GUNZIP.out.gunzip, 'gbk' )
|
||||||
|
DEEPBGC_PIPELINE ( PRODIGAL.out.gene_annotations, DEEPBGC_DOWNLOAD.out.db )
|
||||||
|
}
|
||||||
|
|
||||||
|
workflow test_deepbgc_pipeline_fa {
|
||||||
|
|
||||||
|
input = [
|
||||||
|
[ id:'test_fa', single_end:false ], // meta map
|
||||||
|
file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
|
||||||
|
]
|
||||||
|
|
||||||
|
DEEPBGC_DOWNLOAD ()
|
||||||
|
GUNZIP ( input )
|
||||||
|
DEEPBGC_PIPELINE ( GUNZIP.out.gunzip, DEEPBGC_DOWNLOAD.out.db )
|
||||||
|
}
|
5
tests/modules/deepbgc/pipeline/nextflow.config
Normal file
5
tests/modules/deepbgc/pipeline/nextflow.config
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
process {
|
||||||
|
|
||||||
|
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
|
||||||
|
|
||||||
|
}
|
97
tests/modules/deepbgc/pipeline/test.yml
Normal file
97
tests/modules/deepbgc/pipeline/test.yml
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
- name: deepbgc pipeline test_deepbgc_pipeline_gbk
|
||||||
|
command: nextflow run ./tests/modules/deepbgc/pipeline -entry test_deepbgc_pipeline_gbk -c ./tests/config/nextflow.config -c ./tests/modules/deepbgc/pipeline/nextflow.config
|
||||||
|
tags:
|
||||||
|
- deepbgc/pipeline
|
||||||
|
- deepbgc
|
||||||
|
files:
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/classifier/product_activity.pkl
|
||||||
|
md5sum: 90f0c010460e9df882cb057664a49f30
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/classifier/product_class.pkl
|
||||||
|
md5sum: f78a2eda240403d2f40643d42202f3ac
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_geneborder.pkl
|
||||||
|
md5sum: ca4be7031ae9f70780f17c616a4fa5b5
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_original.pkl
|
||||||
|
md5sum: 2ca2429bb9bc99a401d1093c376b37aa
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_retrained.pkl
|
||||||
|
md5sum: 65679a3b61c562ff4b84bdb574bb6d93
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/deepbgc.pkl
|
||||||
|
md5sum: 7e9218be79ba45bc9adb23bed3845dc1
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.clans.tsv
|
||||||
|
md5sum: a0a4590ffb2b33b83ef2b28f6ead886b
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm
|
||||||
|
md5sum: 79a3328e4c95b13949a4489b19959fc5
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3f
|
||||||
|
md5sum: cbca323cf8dd4e5e7c109114ec444162
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3i
|
||||||
|
md5sum: 5242332a3f6a60cd1ab634cd9331afd6
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3m
|
||||||
|
md5sum: 1fe946fa2b3bcde1d4b2bad732bce612
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3p
|
||||||
|
md5sum: 27b98a1ded123b6a1ef72db01927017c
|
||||||
|
- path: output/deepbgc/test_gbk/LOG.txt
|
||||||
|
contains: ["Saved DeepBGC result to:"]
|
||||||
|
- path: output/deepbgc/test_gbk/README.txt
|
||||||
|
- path: output/deepbgc/test_gbk/test_gbk.antismash.json
|
||||||
|
md5sum: 7dba3996cf38756b05e7612de8433c23
|
||||||
|
- path: output/deepbgc/test_gbk/test_gbk.bgc.gbk
|
||||||
|
- path: output/deepbgc/test_gbk/test_gbk.full.gbk
|
||||||
|
- path: output/gunzip/test1.contigs.fa
|
||||||
|
md5sum: 80c4d78f2810f6d9e90fa6da9bb9c4f9
|
||||||
|
- path: output/prodigal/test_gbk.faa
|
||||||
|
md5sum: b140ca303ff9ee32e615bfcc4b05038c
|
||||||
|
- path: output/prodigal/test_gbk.fna
|
||||||
|
md5sum: 28232dd696754fb95308874c9528296f
|
||||||
|
- path: output/prodigal/test_gbk.gbk
|
||||||
|
md5sum: b5c309b0296e7cdc21d1e71f33400f20
|
||||||
|
- path: output/prodigal/test_gbk_all.txt
|
||||||
|
md5sum: 8fe56fcf4d9e839e83be7523cd3efa02
|
||||||
|
|
||||||
|
- name: deepbgc pipeline test_deepbgc_pipeline_fa
|
||||||
|
command: nextflow run ./tests/modules/deepbgc/pipeline -entry test_deepbgc_pipeline_fa -c ./tests/config/nextflow.config -c ./tests/modules/deepbgc/pipeline/nextflow.config
|
||||||
|
tags:
|
||||||
|
- deepbgc/pipeline
|
||||||
|
- deepbgc
|
||||||
|
files:
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/classifier/product_activity.pkl
|
||||||
|
md5sum: 90f0c010460e9df882cb057664a49f30
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/classifier/product_class.pkl
|
||||||
|
md5sum: f78a2eda240403d2f40643d42202f3ac
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_geneborder.pkl
|
||||||
|
md5sum: ca4be7031ae9f70780f17c616a4fa5b5
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_original.pkl
|
||||||
|
md5sum: 2ca2429bb9bc99a401d1093c376b37aa
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/clusterfinder_retrained.pkl
|
||||||
|
md5sum: 65679a3b61c562ff4b84bdb574bb6d93
|
||||||
|
- path: output/deepbgc/deepbgc_db/0.1.0/detector/deepbgc.pkl
|
||||||
|
md5sum: 7e9218be79ba45bc9adb23bed3845dc1
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.clans.tsv
|
||||||
|
md5sum: a0a4590ffb2b33b83ef2b28f6ead886b
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm
|
||||||
|
md5sum: 79a3328e4c95b13949a4489b19959fc5
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3f
|
||||||
|
md5sum: cbca323cf8dd4e5e7c109114ec444162
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3i
|
||||||
|
md5sum: 5242332a3f6a60cd1ab634cd9331afd6
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3m
|
||||||
|
md5sum: 1fe946fa2b3bcde1d4b2bad732bce612
|
||||||
|
- path: output/deepbgc/deepbgc_db/common/Pfam-A.31.0.hmm.h3p
|
||||||
|
md5sum: 27b98a1ded123b6a1ef72db01927017c
|
||||||
|
- path: output/deepbgc/test1.contigs/LOG.txt
|
||||||
|
contains: ["Saved DeepBGC result to:"]
|
||||||
|
- path: output/deepbgc/test1.contigs/README.txt
|
||||||
|
- path: output/deepbgc/test1.contigs/evaluation/test1.contigs.bgc.png
|
||||||
|
md5sum: f4a0fc6cd260e2d7ad16f7a1fa103f96
|
||||||
|
- path: output/deepbgc/test1.contigs/evaluation/test1.contigs.score.png
|
||||||
|
md5sum: 572e8882031f667580d8c8e13c2cbb91
|
||||||
|
- path: output/deepbgc/test1.contigs/test1.contigs.antismash.json
|
||||||
|
contains: ['"name": "DeepBGC"']
|
||||||
|
- path: output/deepbgc/test1.contigs/test1.contigs.bgc.gbk
|
||||||
|
md5sum: 7fc70dd034903622dae273bf71b402f2
|
||||||
|
- path: output/deepbgc/test1.contigs/test1.contigs.bgc.tsv
|
||||||
|
contains: ["sequence_id"]
|
||||||
|
- path: output/deepbgc/test1.contigs/test1.contigs.full.gbk
|
||||||
|
contains: ["LOCUS"]
|
||||||
|
- path: output/deepbgc/test1.contigs/test1.contigs.pfam.tsv
|
||||||
|
md5sum: 1179eb4e6df0c83aaeec18d7d34e7524
|
||||||
|
- path: output/gunzip/test1.contigs.fa
|
||||||
|
md5sum: 80c4d78f2810f6d9e90fa6da9bb9c4f9
|
Loading…
Reference in a new issue