Create ampir module

This commit is contained in:
jasmezz 2022-05-25 13:27:10 +02:00
parent de74c0a83b
commit 77b5986463
6 changed files with 136 additions and 0 deletions

43
modules/ampir/main.nf Normal file
View file

@ -0,0 +1,43 @@
process AMPIR {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0':
'quay.io/biocontainers/r-ampir:1.1.0' }"
input:
tuple val(meta), path(faa)
val cut_off
val model
val output_name
output:
tuple val(meta), path(output_name) , emit: amps
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
#!/usr/bin/env Rscript
library(ampir)
protein_seqs <- read_faa("${faa}")
prediction <- predict_amps(protein_seqs, model = '$model')
prediction <- protein_seqs[which(prediction\$prob_AMP >= as.integer($cut_off)), ]
df_to_faa(protein_seqs, '$output_name')
version_file_path <- "versions.yml"
version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".")
f <- file(version_file_path, "w")
writeLines('"${task.process}":', f)
writeLines(" ampir: ", f, sep = "")
writeLines(version_ampir, f)
close(f)
"""
}

55
modules/ampir/meta.yml Normal file
View file

@ -0,0 +1,55 @@
name: "ampir"
description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs.
keywords:
- ampir
- amp
- antimicrobial peptide prediction
tools:
- "ampir":
description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale."
homepage: "https://github.com/Legana/ampir"
documentation: "https://github.com/Legana/ampir"
tool_dev_url: "https://github.com/Legana/ampir"
doi: "10.1093/bioinformatics/btaa653"
licence: ["GPL v2"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- faa:
type: file
description: FASTA file containing amino acid sequences
pattern: "*.{faa,fasta}"
- model:
type: value
description: Model for AMP prediction
pattern: "{precursor,mature}"
- cut_off:
type: value
description: Cut-off for AMP prediction
pattern: "[0-9][0-9]"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- amps:
type: file
description: File containing AMP predictions
pattern: "prediction.fasta"
- output_name:
type: value
description: File name of the FASTA output file
pattern: "*.{faa,fasta}"
authors:
- "@jasmezz"

View file

@ -26,6 +26,10 @@ allelecounter:
- modules/allelecounter/**
- tests/modules/allelecounter/**
ampir:
- modules/ampir/**
- tests/modules/ampir/**
amplify/predict:
- modules/amplify/predict/**
- tests/modules/amplify/predict/**

View file

@ -0,0 +1,20 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { AMPIR } from '../../../modules/ampir/main.nf'
workflow test_ampir {
fasta = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true),
]
cut_off = "80"
model = "precursor"
output_name = "prediction.fasta"
AMPIR ( fasta, cut_off, model, output_name )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,9 @@
- name: "ampir"
command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config -c ./tests/modules/ampir/nextflow.config
tags:
- "ampir"
files:
- path: output/ampir/prediction.fasta
md5sum: e605d38752fd90261c924d51f7007189
- path: output/ampir/versions.yml
md5sum: 4a11d25b8a904a7ffb34ae88f6826888