Transdecoder longorf (#1230)

* create transdecoder/longorf module

* module transdecoder

* main.nf

* transdecoder/longorf ready to PR

* review comments update

* transdecoder_longorf reviewed main.nf

* removed predict

* test.yml updated

* pytest run

Co-authored-by: Danilo Di Leo <danilo.dileo@lnu.se>
This commit is contained in:
Danilo Di Leo 2022-01-31 13:34:38 +01:00 committed by GitHub
parent 7026122ec8
commit a74e8436cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 134 additions and 0 deletions

View file

@ -0,0 +1,37 @@
process TRANSDECODER_LONGORF {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::transdecoder=5.5.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/transdecoder:5.5.0--pl5262hdfd78af_4' :
'quay.io/comp-bio-aging/transdecoder' }"
input:
tuple val(meta), path(fasta)
output:
tuple val(meta), path("${meta.id}/*.pep") , emit: pep
tuple val(meta), path("${meta.id}/*.gff3"), emit: gff3
tuple val(meta), path("${meta.id}/*.cds") , emit: cds
tuple val(meta), path("${meta.id}/*.dat") , emit: dat
path("${meta.id}/") , emit: folder
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
TransDecoder.LongOrfs \\
$args \\
-O $prefix \\
-t \\
$fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
transdecoder: \$(echo \$(TransDecoder.LongOrfs --version) | sed -e "s/TransDecoder.LongOrfs //g")
END_VERSIONS
"""
}

View file

@ -0,0 +1,57 @@
name: transdecoder_longorf
description: TransDecoder itentifies candidate coding regions within transcript sequences. it is used to build gff file.
keywords:
- eucaryotes
- gff
tools:
- transdecoder:
description: TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
homepage: https://github.com/TransDecoder
documentation: https://github.com/TransDecoder/TransDecoder/wiki
tool_dev_url: https://github.com/TransDecoder/TransDecoder
doi: ""
licence: ['Broad Institute']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: fasta file
pattern: "*.{fasta}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- pep:
type: Amino acids fasta file
description: all ORFs meeting the minimum length criteria, regardless of coding potential. file
pattern: "*.{pep}"
- gff3:
type: gff file
description: positions of all ORFs as found in the target transcripts. file
pattern: "*.{gff3}"
- cds:
type: fasta file
description: the nucleotide coding sequence for all detected ORFs. file
pattern: "*{cds}"
- dat:
type: tsv file
description: nucleotide frequencies
pattern: "*{dat}"
- folder:
type: folder
description: contains all the files from the run
authors:
- "@Danilo2771"

View file

@ -1457,6 +1457,10 @@ tiddit/sv:
- modules/tiddit/sv/**
- tests/modules/tiddit/sv/**
transdecoder/longorf:
- modules/transdecoder/longorf/**
- tests/modules/transdecoder/longorf/**
trimgalore:
- modules/trimgalore/**
- tests/modules/trimgalore/**

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { TRANSDECODER_LONGORF } from '../../../../modules/transdecoder/longorf/main.nf'
workflow test_transdecoder_longorf {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
TRANSDECODER_LONGORF ( input )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,16 @@
- name: transdecoder longorf test_transdecoder_longorf
command: nextflow run tests/modules/transdecoder/longorf -entry test_transdecoder_longorf -c tests/config/nextflow.config
tags:
- transdecoder
- transdecoder/longorf
files:
- path: output/transdecoder/versions.yml
md5sum: 15cffd5b1119e63d52dc754caf0f2d3c
- path: output/transdecoder/test/base_freqs.dat
md5sum: 5226ef383532e9b16c16b03e35ce181e
- path: output/transdecoder/test/longest_orfs.cds
md5sum: 041ac609e4c4ffb38676b1f88365be0f
- path: output/transdecoder/test/longest_orfs.gff3
md5sum: 96062ef88364f6bf1368d65cd1aad350
- path: output/transdecoder/test/longest_orfs.pep
md5sum: 0b903d36040676efeb2c50ab5ba46bbd