From ecf1d146ab3c3a60fdc5daf0fee18a26c9105c24 Mon Sep 17 00:00:00 2001 From: jasmezz Date: Thu, 9 Jun 2022 16:59:00 +0200 Subject: [PATCH] Add full user options for AMPir --- modules/ampir/main.nf | 15 +++++++++------ modules/ampir/meta.yml | 8 ++++++-- tests/modules/ampir/main.nf | 8 +++++--- tests/modules/ampir/test.yml | 4 ++-- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/modules/ampir/main.nf b/modules/ampir/main.nf index 412317b7..11fc88cf 100644 --- a/modules/ampir/main.nf +++ b/modules/ampir/main.nf @@ -9,8 +9,9 @@ process AMPIR { input: tuple val(meta), path(faa) - val cut_off val model + val min_length + val min_probability output: tuple val(meta), path("*.faa"), emit: amps_faa @@ -23,15 +24,17 @@ process AMPIR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified """ #!/usr/bin/env Rscript library(ampir) - protein_seqs <- read_faa('${faa}') - prediction <- predict_amps(protein_seqs, model = '${model}') - prediction <- protein_seqs[which(prediction\$prob_AMP >= as.integer(${cut_off})), ] - df_to_faa(protein_seqs, "${prefix}.faa") - write.table(prediction, file = "${prefix}.csv", row.names = FALSE, quote = FALSE, dec = '.') + input_seqs <- read_faa('${faa}') + prediction <- predict_amps(input_seqs,${min_length} model = '${model}') + prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ] + output_seqs <- input_seqs[row.names(prediction), ] + write.table(prediction, file = "${prefix}.csv", row.names = FALSE, sep = ";", quote = FALSE, dec = '.') + df_to_faa(output_seqs, "${prefix}.faa") version_file_path <- "versions.yml" version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") diff --git a/modules/ampir/meta.yml b/modules/ampir/meta.yml index 30e55e99..0b3850c0 100644 --- a/modules/ampir/meta.yml +++ b/modules/ampir/meta.yml @@ -25,9 +25,13 @@ input: pattern: "*.{faa,fasta}" - model: type: value - description: Model for AMP prediction + description: Built-in model for AMP prediction pattern: "{precursor,mature}" - - cut_off: + - min_length: + type: value + description: Minimum protein length for which predictions will be generated + pattern: "[0-9]+" + - min_probability: type: value description: Cut-off for AMP prediction pattern: "[0-9][0-9]" diff --git a/tests/modules/ampir/main.nf b/tests/modules/ampir/main.nf index cc7b07c8..bee2ff90 100644 --- a/tests/modules/ampir/main.nf +++ b/tests/modules/ampir/main.nf @@ -10,9 +10,11 @@ workflow test_ampir { file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true), ] - cut_off = "80" - model = "precursor" - AMPIR ( fasta, cut_off, model ) + min_length = [] + + min_probability = "0.7" + + AMPIR ( fasta, model, min_length, min_probability ) } diff --git a/tests/modules/ampir/test.yml b/tests/modules/ampir/test.yml index 40bedd37..4077ef14 100644 --- a/tests/modules/ampir/test.yml +++ b/tests/modules/ampir/test.yml @@ -4,8 +4,8 @@ - ampir files: - path: output/ampir/test.csv - md5sum: 063fd8866f6a669457d7a49404fb449f + md5sum: a3b3152373a8add3613ace8c71073e68 - path: output/ampir/test.faa - md5sum: e605d38752fd90261c924d51f7007189 + md5sum: 0435609144022c55ac196db053f0df89 - path: output/ampir/versions.yml md5sum: 4a11d25b8a904a7ffb34ae88f6826888