Added module arriba (#611)

* Updated the version of STAR in align and genomegenerate modules

* Changes in test.yml

* Changes in test.yml

* Added module arriba

* Changes in test configs

* Added module Arriba for fusion detection

* Fixed review comments

* Added an output option for discarded fusions

* Resolved some conflits

* conflicts

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
praveenraj2018 2021-07-22 16:19:42 +02:00 committed by GitHub
parent 553f51d6bf
commit 3cabc95d0e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 302 additions and 0 deletions

View file

@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

47
modules/arriba/main.nf Normal file
View file

@ -0,0 +1,47 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
options = initOptions(params.options)
process ARRIBA {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::arriba=2.1.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/arriba:2.1.0--h3198e80_1"
} else {
container "quay.io/biocontainers/arriba:2.1.0--h3198e80_1"
}
input:
tuple val(meta), path(bam)
path fasta
path gtf
output:
tuple val(meta), path("*.fusions.tsv") , emit: fusions
tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail
path "*.version.txt" , emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def blacklist = (options.args.contains('-b')) ? '' : '-f blacklist'
"""
arriba \\
-x $bam \\
-a $fasta \\
-g $gtf \\
-o ${prefix}.fusions.tsv \\
-O ${prefix}.fusions.discarded.tsv \\
$blacklist \\
$options.args
echo \$(arriba -h | grep 'Version:' 2>&1) | sed 's/Version:\s//' > ${software}.version.txt
"""
}

54
modules/arriba/meta.yml Normal file
View file

@ -0,0 +1,54 @@
name: arriba
description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data.
keywords:
- fusion
- arriba
tools:
- arriba:
description: Fast and accurate gene fusion detection from RNA-Seq data
homepage: https://github.com/suhrig/arriba
documentation: https://arriba.readthedocs.io/en/latest/
tool_dev_url: https://github.com/suhrig/arriba
doi: "10.1101/gr.257246.119"
licence: ['MIT']
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- fasta:
type: file
description: Assembly FASTA file
pattern: "*.{fasta}"
- gtf:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
- fusions:
type: file
description: File contains fusions which pass all of Arriba's filters.
pattern: "*.{fusions.tsv}"
- fusions_fail:
type: file
description: File contains fusions that Arriba classified as an artifact or that are also observed in healthy tissue.
pattern: "*.{fusions.discarded.tsv}"
authors:
- "@praveenraj2018"

View file

@ -10,6 +10,10 @@ allelecounter:
- modules/allelecounter/**
- tests/modules/allelecounter/**
arriba:
- modules/arriba/**
- tests/modules/arriba/**
artic/guppyplex:
- modules/artic/guppyplex/**
- tests/modules/artic/guppyplex/**

View file

@ -0,0 +1,36 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { STAR_GENOMEGENERATE } from '../../../modules/star/genomegenerate/main.nf' addParams( options: [args: '--genomeSAindexNbases 11'] )
include { STAR_ALIGN } from '../../../modules/star/align/main.nf' addParams( options: [args: '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'] )
include { ARRIBA } from '../../../modules/arriba/main.nf' addParams( options: [:] )
workflow test_arriba_single_end {
input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
STAR_GENOMEGENERATE ( fasta, gtf )
STAR_ALIGN ( input, STAR_GENOMEGENERATE.out.index, gtf )
ARRIBA ( STAR_ALIGN.out.bam, fasta, gtf )
}
workflow test_arriba_paired_end {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) ]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
STAR_GENOMEGENERATE ( fasta, gtf )
STAR_ALIGN ( input, STAR_GENOMEGENERATE.out.index, gtf )
ARRIBA ( STAR_ALIGN.out.bam, fasta, gtf )
}

View file

@ -0,0 +1,93 @@
- name: arriba test_arriba_single_end
command: nextflow run tests/modules/arriba -entry test_arriba_single_end -c tests/config/nextflow.config
tags:
- arriba
files:
- path: output/arriba/test.fusions.discarded.tsv
md5sum: cad8c215b938d1e45b747a5b7898a4c2
- path: output/arriba/test.fusions.tsv
md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70
- path: output/index/star/Genome
md5sum: a654229fbca6071dcb6b01ce7df704da
- path: output/index/star/Log.out
- path: output/index/star/SA
md5sum: 8c3edc46697b72c9e92440d4cf43506c
- path: output/index/star/SAindex
md5sum: 9f085c626553b1c52f2827421972ac10
- path: output/index/star/chrLength.txt
md5sum: c81f40f27e72606d7d07097c1d56a5b5
- path: output/index/star/chrName.txt
md5sum: 5ae68a67b70976ee95342a7451cb5af1
- path: output/index/star/chrNameLength.txt
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/index/star/chrStart.txt
md5sum: 8d3291e6bcdbe9902fbd7c887494173f
- path: output/index/star/exonGeTrInfo.tab
md5sum: d04497f69d6ef889efd4d34fe63edcc4
- path: output/index/star/exonInfo.tab
md5sum: 0d560290fab688b7268d88d5494bf9fe
- path: output/index/star/geneInfo.tab
md5sum: 8b608537307443ffaee4927d2b428805
- path: output/index/star/genomeParameters.txt
md5sum: 9e42067b1ec70b773257529230dd7b3a
- path: output/index/star/sjdbInfo.txt
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
- path: output/index/star/sjdbList.fromGTF.out.tab
md5sum: 8760c33e966dad0b39f440301ebbdee4
- path: output/index/star/sjdbList.out.tab
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
- path: output/index/star/transcriptInfo.tab
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36
- path: output/star/test.Aligned.out.bam
md5sum: 29c99195dcc79ff4df1f754ff16aac78
- path: output/star/test.Log.final.out
- path: output/star/test.Log.out
- path: output/star/test.Log.progress.out
- path: output/star/test.SJ.out.tab
- name: arriba test_arriba_paired_end
command: nextflow run tests/modules/arriba -entry test_arriba_paired_end -c tests/config/nextflow.config
tags:
- arriba
files:
- path: output/arriba/test.fusions.discarded.tsv
md5sum: 85e36c887464e4deaa65f45174d3b8fd
- path: output/arriba/test.fusions.tsv
md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70
- path: output/index/star/Genome
md5sum: a654229fbca6071dcb6b01ce7df704da
- path: output/index/star/Log.out
- path: output/index/star/SA
md5sum: 8c3edc46697b72c9e92440d4cf43506c
- path: output/index/star/SAindex
md5sum: 9f085c626553b1c52f2827421972ac10
- path: output/index/star/chrLength.txt
md5sum: c81f40f27e72606d7d07097c1d56a5b5
- path: output/index/star/chrName.txt
md5sum: 5ae68a67b70976ee95342a7451cb5af1
- path: output/index/star/chrNameLength.txt
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/index/star/chrStart.txt
md5sum: 8d3291e6bcdbe9902fbd7c887494173f
- path: output/index/star/exonGeTrInfo.tab
md5sum: d04497f69d6ef889efd4d34fe63edcc4
- path: output/index/star/exonInfo.tab
md5sum: 0d560290fab688b7268d88d5494bf9fe
- path: output/index/star/geneInfo.tab
md5sum: 8b608537307443ffaee4927d2b428805
- path: output/index/star/genomeParameters.txt
md5sum: 9e42067b1ec70b773257529230dd7b3a
- path: output/index/star/sjdbInfo.txt
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
- path: output/index/star/sjdbList.fromGTF.out.tab
md5sum: 8760c33e966dad0b39f440301ebbdee4
- path: output/index/star/sjdbList.out.tab
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
- path: output/index/star/transcriptInfo.tab
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36
- path: output/star/test.Aligned.out.bam
md5sum: d724ca90a102347b9c5052a33ea4d308
- path: output/star/test.Log.final.out
- path: output/star/test.Log.out
- path: output/star/test.Log.progress.out
- path: output/star/test.SJ.out.tab