Add file name collision check + TSV output

This commit is contained in:
jasmezz 2022-06-10 12:54:39 +02:00
parent 5841f11edb
commit 39388abc04
3 changed files with 8 additions and 7 deletions

View file

@ -15,7 +15,7 @@ process AMPIR {
output: output:
tuple val(meta), path("*.faa"), emit: amps_faa tuple val(meta), path("*.faa"), emit: amps_faa
tuple val(meta), path("*.csv"), emit: amps_csv tuple val(meta), path("*.tsv"), emit: amps_csv
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
when: when:
@ -25,6 +25,7 @@ process AMPIR {
def args = task.ext.args ?: '' def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}" def prefix = task.ext.prefix ?: "${meta.id}"
min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified
if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
""" """
#!/usr/bin/env Rscript #!/usr/bin/env Rscript
library(ampir) library(ampir)
@ -33,7 +34,7 @@ process AMPIR {
prediction <- predict_amps(input_seqs,${min_length} model = '${model}') prediction <- predict_amps(input_seqs,${min_length} model = '${model}')
prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ] prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ]
output_seqs <- input_seqs[row.names(prediction), ] output_seqs <- input_seqs[row.names(prediction), ]
write.table(prediction, file = "${prefix}.csv", row.names = FALSE, sep = ";", quote = FALSE, dec = '.') write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.')
df_to_faa(output_seqs, "${prefix}.faa") df_to_faa(output_seqs, "${prefix}.faa")
version_file_path <- "versions.yml" version_file_path <- "versions.yml"

View file

@ -49,11 +49,11 @@ output:
- amps_faa: - amps_faa:
type: file type: file
description: File containing AMP predictions in amino acid FASTA format description: File containing AMP predictions in amino acid FASTA format
pattern: "*.{faa,fasta}" pattern: "*.{faa}"
- amps_csv: - amps_csv:
type: file type: file
description: File containing AMP predictions in CSV format description: File containing AMP predictions in TSV format
pattern: "*.csv" pattern: "*.tsv"
authors: authors:
- "@jasmezz" - "@jasmezz"

View file

@ -3,8 +3,8 @@
tags: tags:
- ampir - ampir
files: files:
- path: output/ampir/test.csv - path: output/ampir/test.tsv
contains: ["seq_name;seq_aa;prob_AMP", "WP_014895017.1"] contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"]
- path: output/ampir/test.faa - path: output/ampir/test.faa
md5sum: 0435609144022c55ac196db053f0df89 md5sum: 0435609144022c55ac196db053f0df89
- path: output/ampir/versions.yml - path: output/ampir/versions.yml