diff --git a/modules/ampir/main.nf b/modules/ampir/main.nf new file mode 100644 index 00000000..57e20902 --- /dev/null +++ b/modules/ampir/main.nf @@ -0,0 +1,48 @@ +process AMPIR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0': + 'quay.io/biocontainers/r-ampir:1.1.0' }" + + input: + tuple val(meta), path(faa) + val model + val min_length + val min_probability + + output: + tuple val(meta), path("*.faa"), emit: amps_faa + tuple val(meta), path("*.tsv"), emit: amps_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified + if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + #!/usr/bin/env Rscript + library(ampir) + + input_seqs <- read_faa('${faa}') + prediction <- predict_amps(input_seqs,${min_length} model = '${model}') + prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ] + output_seqs <- input_seqs[row.names(prediction), ] + write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.') + df_to_faa(output_seqs, "${prefix}.faa") + + version_file_path <- "versions.yml" + version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ampir: ", f, sep = "") + writeLines(version_ampir, f) + close(f) + """ +} diff --git a/modules/ampir/meta.yml b/modules/ampir/meta.yml new file mode 100644 index 00000000..7569ca69 --- /dev/null +++ b/modules/ampir/meta.yml @@ -0,0 +1,59 @@ +name: "ampir" +description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs. +keywords: + - ampir + - amp + - antimicrobial peptide prediction +tools: + - "ampir": + description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale." + homepage: "https://github.com/Legana/ampir" + documentation: "https://cran.r-project.org/web/packages/ampir/index.html" + tool_dev_url: "https://github.com/Legana/ampir" + doi: "10.1093/bioinformatics/btaa653" + licence: ["GPL v2"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - faa: + type: file + description: FASTA file containing amino acid sequences + pattern: "*.{faa,fasta}" + - model: + type: value + description: Built-in model for AMP prediction + pattern: "{precursor,mature}" + - min_length: + type: value + description: Minimum protein length for which predictions will be generated + pattern: "[0-9]+" + - min_probability: + type: value + description: Cut-off for AMP prediction + pattern: "[0-9][0-9]" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - amps_faa: + type: file + description: File containing AMP predictions in amino acid FASTA format + pattern: "*.{faa}" + - amps_tsv: + type: file + description: File containing AMP predictions in TSV format + pattern: "*.tsv" + +authors: + - "@jasmezz" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 811ba822..0f177f5f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -26,6 +26,10 @@ allelecounter: - modules/allelecounter/** - tests/modules/allelecounter/** +ampir: + - modules/ampir/** + - tests/modules/ampir/** + amplify/predict: - modules/amplify/predict/** - tests/modules/amplify/predict/** diff --git a/tests/modules/ampir/main.nf b/tests/modules/ampir/main.nf new file mode 100644 index 00000000..bee2ff90 --- /dev/null +++ b/tests/modules/ampir/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { AMPIR } from '../../../modules/ampir/main.nf' + +workflow test_ampir { + + fasta = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true), + ] + + model = "precursor" + + min_length = [] + + min_probability = "0.7" + + AMPIR ( fasta, model, min_length, min_probability ) +} diff --git a/tests/modules/ampir/nextflow.config b/tests/modules/ampir/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/ampir/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/ampir/test.yml b/tests/modules/ampir/test.yml new file mode 100644 index 00000000..54921e9a --- /dev/null +++ b/tests/modules/ampir/test.yml @@ -0,0 +1,11 @@ +- name: ampir test_ampir + command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config -c ./tests/modules/ampir/nextflow.config + tags: + - ampir + files: + - path: output/ampir/test.tsv + contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"] + - path: output/ampir/test.faa + md5sum: 0435609144022c55ac196db053f0df89 + - path: output/ampir/versions.yml + md5sum: 4a11d25b8a904a7ffb34ae88f6826888