Add deeparg (#1234)

* Add deeparg download data

* Tweak downloaddata test.yml

* Add deeparg tests

* Apply suggestions from code review

* Fix output file paths in test

* Fix tests again md5sum to string
This commit is contained in:
James A. Fellows Yates 2022-01-26 15:19:16 +01:00 committed by GitHub
parent 1b24005f07
commit c3bacb9fdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 243 additions and 0 deletions

View file

@ -0,0 +1,30 @@
def VERSION='1.0.2'
process DEEPARG_DOWNLOADDATA {
label 'process_low'
conda (params.enable_conda ? "bioconda::deeparg=1.0.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/deeparg:1.0.2--pyhdfd78af_1' :
'quay.io/biocontainers/deeparg:1.0.2--pyhdfd78af_1' }"
input:
output:
path "db/" , emit: db
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
deeparg \\
download_data \\
$args \\
-o db/
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deeparg: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,33 @@
name: deeparg_downloaddata
description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes
keywords:
- download
- database
- deeparg
- antimicrobial resistance genes
- deep learning
- prediction
tools:
- deeparg:
description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes
homepage: https://bench.cs.vt.edu/deeparg
documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
doi: "10.1186/s40168-018-0401-z"
licence: ['MIT']
input:
- none: There is no input. This module downloads a pre-built database for use with deepARG.
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- db:
type: directory
description: Directory containing database required for deepARG.
pattern: "db/"
authors:
- "@jfy133"

View file

@ -0,0 +1,40 @@
def VERSION="1.0.2"
process DEEPARG_PREDICT {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::deeparg=1.0.2" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity//deeparg:1.0.2--pyhdfd78af_1' :
'quay.io/biocontainers/deeparg:1.0.2--pyhdfd78af_1' }"
input:
tuple val(meta), path(fasta), val(model)
tuple path(db)
output:
tuple val(meta), path("*.align.daa") , emit: daa
tuple val(meta), path("*.align.daa.tsv") , emit: daa_tsv
tuple val(meta), path("*.mapping.ARG") , emit: arg
tuple val(meta), path("*.mapping.potential.ARG"), emit: potential_arg
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
deeparg \\
predict \\
$args \\
-i $fasta \\
-o ${prefix} \\
-d $db \\
--model $model
cat <<-END_VERSIONS > versions.yml
"${task.process}":
deeparg: $VERSION
END_VERSIONS
"""
}

View file

@ -0,0 +1,68 @@
name: deeparg_predict
description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes
keywords:
- deeparg
- antimicrobial resistance
- antimicrobial resistance genes
- arg
- deep learning
- prediction
- contigs
- metagenomes
tools:
- deeparg:
description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes
homepage: https://bench.cs.vt.edu/deeparg
documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/
doi: "10.1186/s40168-018-0401-z"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- fasta:
type: file
description: FASTA file containing gene-like sequences
pattern: "*.{fasta,fa,fna}"
- model:
type: string
description: Which model to use, depending on input data. Either 'LS' or 'SS' for long or short sequences respectively
pattern: "LS|LS"
- db:
type: directory
description: Path to a directory containing the deepARG pre-built models
pattern: "*/"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- daa:
type: file
description: Sequences of ARG-like sequences from DIAMOND alignment
pattern: "*.align.daa"
- daa_tsv:
type: file
description: Alignments scores against ARG-like sequences from DIAMOND alignment
pattern: "*.align.daa.tsv"
- arg:
type: file
description: Table containing sequences with an ARG-like probability of more than specified thresholds
pattern: "*.mapping.ARG"
- potential_arg:
type: file
description: Table containing sequences with an ARG-like probability of less than specified thresholds, and requires manual inspection
pattern: "*.mapping.potential.ARG"
authors:
- "@jfy133"

View file

@ -380,6 +380,14 @@ dedup:
- modules/dedup/**
- tests/modules/dedup/**
deeparg/downloaddata:
- modules/deeparg/downloaddata/**
- tests/modules/deeparg/downloaddata/**
deeparg/predict:
- modules/deeparg/predict/**
- tests/modules/deeparg/predict/**
deeptools/computematrix:
- modules/deeptools/computematrix/**
- tests/modules/deeptools/computematrix/**

View file

@ -0,0 +1,9 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { DEEPARG_DOWNLOADDATA } from '../../../../modules/deeparg/downloaddata/main.nf'
workflow test_deeparg_downloaddata {
DEEPARG_DOWNLOADDATA ()
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,9 @@
- name: deeparg downloaddata test_deeparg_downloaddata
command: nextflow run tests/modules/deeparg/downloaddata -entry test_deeparg_downloaddata -c tests/config/nextflow.config
tags:
- deeparg
- deeparg/downloaddata
files:
- path: output/deeparg/db/
- path: output/deeparg/db/data/gg13/dataset.rev.2.bt2
md5sum: 99d90f132fc2795d5a527ce31f1c4d30

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { DEEPARG_DOWNLOADDATA } from '../../../../modules/deeparg/downloaddata/main.nf'
include { DEEPARG_PREDICT } from '../../../../modules/deeparg/predict/main.nf'
workflow test_deeparg_predict {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
'LS'
]
DEEPARG_DOWNLOADDATA()
DEEPARG_PREDICT ( input, DEEPARG_DOWNLOADDATA.out.db )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,17 @@
- name: deeparg predict test_deeparg_predict
command: nextflow run tests/modules/deeparg/predict -entry test_deeparg_predict -c tests/config/nextflow.config
tags:
- deeparg/predict
- deeparg
files:
- path: output/deeparg/test.align.daa
md5sum: c52d0af8362244f214da25bc45f2bf42
- path: output/deeparg/test.align.daa.tsv
md5sum: a4aa1da2db98274ede2b927fa8227e5a
- path: output/deeparg/test.mapping.ARG
md5sum: 0e049e99eab4c55666062df21707d5b9
- path: output/deeparg/test.mapping.potential.ARG
contains:
- "#ARG"
- path: output/deeparg/versions.yml
md5sum: e848ddab324e8c6fd18eaa6b2656f195