From c3bacb9fdd8d35b26a4673ce3fd052d600817ad3 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 26 Jan 2022 15:19:16 +0100 Subject: [PATCH] Add `deeparg` (#1234) * Add deeparg download data * Tweak downloaddata test.yml * Add deeparg tests * Apply suggestions from code review * Fix output file paths in test * Fix tests again md5sum to string --- modules/deeparg/downloaddata/main.nf | 30 ++++++++ modules/deeparg/downloaddata/meta.yml | 33 +++++++++ modules/deeparg/predict/main.nf | 40 +++++++++++ modules/deeparg/predict/meta.yml | 68 +++++++++++++++++++ tests/config/pytest_modules.yml | 8 +++ tests/modules/deeparg/downloaddata/main.nf | 9 +++ .../deeparg/downloaddata/nextflow.config | 5 ++ tests/modules/deeparg/downloaddata/test.yml | 9 +++ tests/modules/deeparg/predict/main.nf | 19 ++++++ tests/modules/deeparg/predict/nextflow.config | 5 ++ tests/modules/deeparg/predict/test.yml | 17 +++++ 11 files changed, 243 insertions(+) create mode 100644 modules/deeparg/downloaddata/main.nf create mode 100644 modules/deeparg/downloaddata/meta.yml create mode 100644 modules/deeparg/predict/main.nf create mode 100644 modules/deeparg/predict/meta.yml create mode 100644 tests/modules/deeparg/downloaddata/main.nf create mode 100644 tests/modules/deeparg/downloaddata/nextflow.config create mode 100644 tests/modules/deeparg/downloaddata/test.yml create mode 100644 tests/modules/deeparg/predict/main.nf create mode 100644 tests/modules/deeparg/predict/nextflow.config create mode 100644 tests/modules/deeparg/predict/test.yml diff --git a/modules/deeparg/downloaddata/main.nf b/modules/deeparg/downloaddata/main.nf new file mode 100644 index 00000000..cc420e6a --- /dev/null +++ b/modules/deeparg/downloaddata/main.nf @@ -0,0 +1,30 @@ +def VERSION='1.0.2' + +process DEEPARG_DOWNLOADDATA { + label 'process_low' + + conda (params.enable_conda ? "bioconda::deeparg=1.0.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeparg:1.0.2--pyhdfd78af_1' : + 'quay.io/biocontainers/deeparg:1.0.2--pyhdfd78af_1' }" + + input: + + output: + path "db/" , emit: db + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + deeparg \\ + download_data \\ + $args \\ + -o db/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ +} diff --git a/modules/deeparg/downloaddata/meta.yml b/modules/deeparg/downloaddata/meta.yml new file mode 100644 index 00000000..6cfa192e --- /dev/null +++ b/modules/deeparg/downloaddata/meta.yml @@ -0,0 +1,33 @@ +name: deeparg_downloaddata +description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes +keywords: + - download + - database + - deeparg + - antimicrobial resistance genes + - deep learning + - prediction +tools: + - deeparg: + description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes + homepage: https://bench.cs.vt.edu/deeparg + documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + doi: "10.1186/s40168-018-0401-z" + licence: ['MIT'] + +input: + - none: There is no input. This module downloads a pre-built database for use with deepARG. + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: directory + description: Directory containing database required for deepARG. + pattern: "db/" + +authors: + - "@jfy133" diff --git a/modules/deeparg/predict/main.nf b/modules/deeparg/predict/main.nf new file mode 100644 index 00000000..1af0fd40 --- /dev/null +++ b/modules/deeparg/predict/main.nf @@ -0,0 +1,40 @@ +def VERSION="1.0.2" + +process DEEPARG_PREDICT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::deeparg=1.0.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity//deeparg:1.0.2--pyhdfd78af_1' : + 'quay.io/biocontainers/deeparg:1.0.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(fasta), val(model) + tuple path(db) + + output: + tuple val(meta), path("*.align.daa") , emit: daa + tuple val(meta), path("*.align.daa.tsv") , emit: daa_tsv + tuple val(meta), path("*.mapping.ARG") , emit: arg + tuple val(meta), path("*.mapping.potential.ARG"), emit: potential_arg + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + deeparg \\ + predict \\ + $args \\ + -i $fasta \\ + -o ${prefix} \\ + -d $db \\ + --model $model + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeparg: $VERSION + END_VERSIONS + """ +} diff --git a/modules/deeparg/predict/meta.yml b/modules/deeparg/predict/meta.yml new file mode 100644 index 00000000..244b9df7 --- /dev/null +++ b/modules/deeparg/predict/meta.yml @@ -0,0 +1,68 @@ +name: deeparg_predict +description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes +keywords: + - deeparg + - antimicrobial resistance + - antimicrobial resistance genes + - arg + - deep learning + - prediction + - contigs + - metagenomes +tools: + - deeparg: + description: A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes + homepage: https://bench.cs.vt.edu/deeparg + documentation: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + tool_dev_url: https://bitbucket.org/gusphdproj/deeparg-ss/src/master/ + doi: "10.1186/s40168-018-0401-z" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file containing gene-like sequences + pattern: "*.{fasta,fa,fna}" + - model: + type: string + description: Which model to use, depending on input data. Either 'LS' or 'SS' for long or short sequences respectively + pattern: "LS|LS" + - db: + type: directory + description: Path to a directory containing the deepARG pre-built models + pattern: "*/" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - daa: + type: file + description: Sequences of ARG-like sequences from DIAMOND alignment + pattern: "*.align.daa" + - daa_tsv: + type: file + description: Alignments scores against ARG-like sequences from DIAMOND alignment + pattern: "*.align.daa.tsv" + - arg: + type: file + description: Table containing sequences with an ARG-like probability of more than specified thresholds + pattern: "*.mapping.ARG" + - potential_arg: + type: file + description: Table containing sequences with an ARG-like probability of less than specified thresholds, and requires manual inspection + pattern: "*.mapping.potential.ARG" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 967b7d9f..2321c918 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -380,6 +380,14 @@ dedup: - modules/dedup/** - tests/modules/dedup/** +deeparg/downloaddata: + - modules/deeparg/downloaddata/** + - tests/modules/deeparg/downloaddata/** + +deeparg/predict: + - modules/deeparg/predict/** + - tests/modules/deeparg/predict/** + deeptools/computematrix: - modules/deeptools/computematrix/** - tests/modules/deeptools/computematrix/** diff --git a/tests/modules/deeparg/downloaddata/main.nf b/tests/modules/deeparg/downloaddata/main.nf new file mode 100644 index 00000000..ed2d48bb --- /dev/null +++ b/tests/modules/deeparg/downloaddata/main.nf @@ -0,0 +1,9 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { DEEPARG_DOWNLOADDATA } from '../../../../modules/deeparg/downloaddata/main.nf' + +workflow test_deeparg_downloaddata { + DEEPARG_DOWNLOADDATA () +} diff --git a/tests/modules/deeparg/downloaddata/nextflow.config b/tests/modules/deeparg/downloaddata/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/deeparg/downloaddata/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/deeparg/downloaddata/test.yml b/tests/modules/deeparg/downloaddata/test.yml new file mode 100644 index 00000000..c5c99de2 --- /dev/null +++ b/tests/modules/deeparg/downloaddata/test.yml @@ -0,0 +1,9 @@ +- name: deeparg downloaddata test_deeparg_downloaddata + command: nextflow run tests/modules/deeparg/downloaddata -entry test_deeparg_downloaddata -c tests/config/nextflow.config + tags: + - deeparg + - deeparg/downloaddata + files: + - path: output/deeparg/db/ + - path: output/deeparg/db/data/gg13/dataset.rev.2.bt2 + md5sum: 99d90f132fc2795d5a527ce31f1c4d30 diff --git a/tests/modules/deeparg/predict/main.nf b/tests/modules/deeparg/predict/main.nf new file mode 100644 index 00000000..2758ab58 --- /dev/null +++ b/tests/modules/deeparg/predict/main.nf @@ -0,0 +1,19 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { DEEPARG_DOWNLOADDATA } from '../../../../modules/deeparg/downloaddata/main.nf' +include { DEEPARG_PREDICT } from '../../../../modules/deeparg/predict/main.nf' + +workflow test_deeparg_predict { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), + 'LS' + ] + + DEEPARG_DOWNLOADDATA() + DEEPARG_PREDICT ( input, DEEPARG_DOWNLOADDATA.out.db ) + +} diff --git a/tests/modules/deeparg/predict/nextflow.config b/tests/modules/deeparg/predict/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/deeparg/predict/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/deeparg/predict/test.yml b/tests/modules/deeparg/predict/test.yml new file mode 100644 index 00000000..e3a15411 --- /dev/null +++ b/tests/modules/deeparg/predict/test.yml @@ -0,0 +1,17 @@ +- name: deeparg predict test_deeparg_predict + command: nextflow run tests/modules/deeparg/predict -entry test_deeparg_predict -c tests/config/nextflow.config + tags: + - deeparg/predict + - deeparg + files: + - path: output/deeparg/test.align.daa + md5sum: c52d0af8362244f214da25bc45f2bf42 + - path: output/deeparg/test.align.daa.tsv + md5sum: a4aa1da2db98274ede2b927fa8227e5a + - path: output/deeparg/test.mapping.ARG + md5sum: 0e049e99eab4c55666062df21707d5b9 + - path: output/deeparg/test.mapping.potential.ARG + contains: + - "#ARG" + - path: output/deeparg/versions.yml + md5sum: e848ddab324e8c6fd18eaa6b2656f195