Transdecoder longorf (#1230)

* create transdecoder/longorf module * module transdecoder * main.nf * transdecoder/longorf ready to PR * review comments update * transdecoder_longorf reviewed main.nf * removed predict * test.yml updated * pytest run Co-authored-by: Danilo Di Leo <danilo.dileo@lnu.se>
2024-12-22 02:58:17 +00:00 · 2022-01-31 13:34:38 +01:00 · 2022-01-31 13:34:38 +01:00 · a74e8436cc
commit a74e8436cc
parent 7026122ec8
6 changed files with 134 additions and 0 deletions
--- a/modules/transdecoder/longorf/main.nf
+++ b/modules/transdecoder/longorf/main.nf
@ -0,0 +1,37 @@
+process TRANSDECODER_LONGORF {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "bioconda::transdecoder=5.5.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    'https://depot.galaxyproject.org/singularity/transdecoder:5.5.0--pl5262hdfd78af_4' :
+    'quay.io/comp-bio-aging/transdecoder' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("${meta.id}/*.pep") , emit: pep
+    tuple val(meta), path("${meta.id}/*.gff3"), emit: gff3
+    tuple val(meta), path("${meta.id}/*.cds") , emit: cds
+    tuple val(meta), path("${meta.id}/*.dat") , emit: dat
+    path("${meta.id}/")                       , emit: folder
+    path "versions.yml"                       , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    TransDecoder.LongOrfs \\
+        $args \\
+        -O $prefix \\
+        -t \\
+        $fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        transdecoder: \$(echo \$(TransDecoder.LongOrfs --version) | sed -e "s/TransDecoder.LongOrfs //g")
+    END_VERSIONS
+    """
+}
--- a/modules/transdecoder/longorf/meta.yml
+++ b/modules/transdecoder/longorf/meta.yml
@ -0,0 +1,57 @@
+name: transdecoder_longorf
+description: TransDecoder itentifies candidate coding regions within transcript sequences. it is used to build gff file.
+keywords:
+  - eucaryotes
+  - gff
+tools:
+  - transdecoder:
+      description: TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
+      homepage: https://github.com/TransDecoder
+      documentation: https://github.com/TransDecoder/TransDecoder/wiki
+      tool_dev_url: https://github.com/TransDecoder/TransDecoder
+      doi: ""
+      licence: ['Broad Institute']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: fasta file
+      pattern: "*.{fasta}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - pep:
+      type: Amino acids fasta file
+      description: all ORFs meeting the minimum length criteria, regardless of coding potential. file
+      pattern: "*.{pep}"
+  - gff3:
+      type: gff file
+      description: positions of all ORFs as found in the target transcripts. file
+      pattern: "*.{gff3}"
+  - cds:
+      type: fasta file
+      description: the nucleotide coding sequence for all detected ORFs. file
+      pattern: "*{cds}"
+  - dat:
+      type: tsv file
+      description: nucleotide frequencies
+      pattern: "*{dat}"
+  - folder:
+      type: folder
+      description: contains all the files from the run
+
+authors:
+  - "@Danilo2771"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -1457,6 +1457,10 @@ tiddit/sv:
  - modules/tiddit/sv/**
  - tests/modules/tiddit/sv/**

+transdecoder/longorf:
+  - modules/transdecoder/longorf/**
+  - tests/modules/transdecoder/longorf/**
+
 trimgalore:
  - modules/trimgalore/**
  - tests/modules/trimgalore/**
--- a/tests/modules/transdecoder/longorf/main.nf
+++ b/tests/modules/transdecoder/longorf/main.nf
@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { TRANSDECODER_LONGORF } from '../../../../modules/transdecoder/longorf/main.nf'
+
+workflow test_transdecoder_longorf {
+    
+    input = [ 
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) 
+    ]
+
+    TRANSDECODER_LONGORF ( input )
+}
--- a/tests/modules/transdecoder/longorf/nextflow.config
+++ b/tests/modules/transdecoder/longorf/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/transdecoder/longorf/test.yml
+++ b/tests/modules/transdecoder/longorf/test.yml
@ -0,0 +1,16 @@
+- name: transdecoder longorf test_transdecoder_longorf
+  command: nextflow run tests/modules/transdecoder/longorf -entry test_transdecoder_longorf -c tests/config/nextflow.config
+  tags:
+    - transdecoder
+    - transdecoder/longorf
+  files:
+    - path: output/transdecoder/versions.yml
+      md5sum: 15cffd5b1119e63d52dc754caf0f2d3c
+    - path: output/transdecoder/test/base_freqs.dat
+      md5sum: 5226ef383532e9b16c16b03e35ce181e
+    - path: output/transdecoder/test/longest_orfs.cds
+      md5sum: 041ac609e4c4ffb38676b1f88365be0f
+    - path: output/transdecoder/test/longest_orfs.gff3
+      md5sum: 96062ef88364f6bf1368d65cd1aad350
+    - path: output/transdecoder/test/longest_orfs.pep
+      md5sum: 0b903d36040676efeb2c50ab5ba46bbd