mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Added module arriba (#611)
* Updated the version of STAR in align and genomegenerate modules * Changes in test.yml * Changes in test.yml * Added module arriba * Changes in test configs * Added module Arriba for fusion detection * Fixed review comments * Added an output option for discarded fusions * Resolved some conflits * conflicts * Apply suggestions from code review Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
553f51d6bf
commit
3cabc95d0e
6 changed files with 302 additions and 0 deletions
68
modules/arriba/functions.nf
Normal file
68
modules/arriba/functions.nf
Normal file
|
@ -0,0 +1,68 @@
|
|||
//
|
||||
// Utility functions used in nf-core DSL2 module files
|
||||
//
|
||||
|
||||
//
|
||||
// Extract name of software tool from process name using $task.process
|
||||
//
|
||||
def getSoftwareName(task_process) {
|
||||
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
|
||||
}
|
||||
|
||||
//
|
||||
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
|
||||
//
|
||||
def initOptions(Map args) {
|
||||
def Map options = [:]
|
||||
options.args = args.args ?: ''
|
||||
options.args2 = args.args2 ?: ''
|
||||
options.args3 = args.args3 ?: ''
|
||||
options.publish_by_meta = args.publish_by_meta ?: []
|
||||
options.publish_dir = args.publish_dir ?: ''
|
||||
options.publish_files = args.publish_files
|
||||
options.suffix = args.suffix ?: ''
|
||||
return options
|
||||
}
|
||||
|
||||
//
|
||||
// Tidy up and join elements of a list to return a path string
|
||||
//
|
||||
def getPathFromList(path_list) {
|
||||
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
|
||||
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
|
||||
return paths.join('/')
|
||||
}
|
||||
|
||||
//
|
||||
// Function to save/publish module results
|
||||
//
|
||||
def saveFiles(Map args) {
|
||||
if (!args.filename.endsWith('.version.txt')) {
|
||||
def ioptions = initOptions(args.options)
|
||||
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
|
||||
if (ioptions.publish_by_meta) {
|
||||
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
|
||||
for (key in key_list) {
|
||||
if (args.meta && key instanceof String) {
|
||||
def path = key
|
||||
if (args.meta.containsKey(key)) {
|
||||
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
|
||||
}
|
||||
path = path instanceof String ? path : ''
|
||||
path_list.add(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ioptions.publish_files instanceof Map) {
|
||||
for (ext in ioptions.publish_files) {
|
||||
if (args.filename.endsWith(ext.key)) {
|
||||
def ext_list = path_list.collect()
|
||||
ext_list.add(ext.value)
|
||||
return "${getPathFromList(ext_list)}/$args.filename"
|
||||
}
|
||||
}
|
||||
} else if (ioptions.publish_files == null) {
|
||||
return "${getPathFromList(path_list)}/$args.filename"
|
||||
}
|
||||
}
|
||||
}
|
47
modules/arriba/main.nf
Normal file
47
modules/arriba/main.nf
Normal file
|
@ -0,0 +1,47 @@
|
|||
// Import generic module functions
|
||||
include { initOptions; saveFiles; getSoftwareName } from './functions'
|
||||
|
||||
params.options = [:]
|
||||
options = initOptions(params.options)
|
||||
|
||||
process ARRIBA {
|
||||
tag "$meta.id"
|
||||
label 'process_medium'
|
||||
publishDir "${params.outdir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
|
||||
|
||||
conda (params.enable_conda ? "bioconda::arriba=2.1.0" : null)
|
||||
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
|
||||
container "https://depot.galaxyproject.org/singularity/arriba:2.1.0--h3198e80_1"
|
||||
} else {
|
||||
container "quay.io/biocontainers/arriba:2.1.0--h3198e80_1"
|
||||
}
|
||||
|
||||
input:
|
||||
tuple val(meta), path(bam)
|
||||
path fasta
|
||||
path gtf
|
||||
|
||||
output:
|
||||
tuple val(meta), path("*.fusions.tsv") , emit: fusions
|
||||
tuple val(meta), path("*.fusions.discarded.tsv"), emit: fusions_fail
|
||||
path "*.version.txt" , emit: version
|
||||
|
||||
script:
|
||||
def software = getSoftwareName(task.process)
|
||||
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
|
||||
def blacklist = (options.args.contains('-b')) ? '' : '-f blacklist'
|
||||
"""
|
||||
arriba \\
|
||||
-x $bam \\
|
||||
-a $fasta \\
|
||||
-g $gtf \\
|
||||
-o ${prefix}.fusions.tsv \\
|
||||
-O ${prefix}.fusions.discarded.tsv \\
|
||||
$blacklist \\
|
||||
$options.args
|
||||
|
||||
echo \$(arriba -h | grep 'Version:' 2>&1) | sed 's/Version:\s//' > ${software}.version.txt
|
||||
"""
|
||||
}
|
54
modules/arriba/meta.yml
Normal file
54
modules/arriba/meta.yml
Normal file
|
@ -0,0 +1,54 @@
|
|||
name: arriba
|
||||
description: Arriba is a command-line tool for the detection of gene fusions from RNA-Seq data.
|
||||
keywords:
|
||||
- fusion
|
||||
- arriba
|
||||
tools:
|
||||
- arriba:
|
||||
description: Fast and accurate gene fusion detection from RNA-Seq data
|
||||
homepage: https://github.com/suhrig/arriba
|
||||
documentation: https://arriba.readthedocs.io/en/latest/
|
||||
tool_dev_url: https://github.com/suhrig/arriba
|
||||
doi: "10.1101/gr.257246.119"
|
||||
licence: ['MIT']
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: BAM/CRAM/SAM file
|
||||
pattern: "*.{bam,cram,sam}"
|
||||
- fasta:
|
||||
type: file
|
||||
description: Assembly FASTA file
|
||||
pattern: "*.{fasta}"
|
||||
- gtf:
|
||||
type: file
|
||||
description: Annotation GTF file
|
||||
pattern: "*.{gtf}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.{version.txt}"
|
||||
- fusions:
|
||||
type: file
|
||||
description: File contains fusions which pass all of Arriba's filters.
|
||||
pattern: "*.{fusions.tsv}"
|
||||
- fusions_fail:
|
||||
type: file
|
||||
description: File contains fusions that Arriba classified as an artifact or that are also observed in healthy tissue.
|
||||
pattern: "*.{fusions.discarded.tsv}"
|
||||
|
||||
authors:
|
||||
- "@praveenraj2018"
|
|
@ -10,6 +10,10 @@ allelecounter:
|
|||
- modules/allelecounter/**
|
||||
- tests/modules/allelecounter/**
|
||||
|
||||
arriba:
|
||||
- modules/arriba/**
|
||||
- tests/modules/arriba/**
|
||||
|
||||
artic/guppyplex:
|
||||
- modules/artic/guppyplex/**
|
||||
- tests/modules/artic/guppyplex/**
|
||||
|
|
36
tests/modules/arriba/main.nf
Normal file
36
tests/modules/arriba/main.nf
Normal file
|
@ -0,0 +1,36 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { STAR_GENOMEGENERATE } from '../../../modules/star/genomegenerate/main.nf' addParams( options: [args: '--genomeSAindexNbases 11'] )
|
||||
include { STAR_ALIGN } from '../../../modules/star/align/main.nf' addParams( options: [args: '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'] )
|
||||
include { ARRIBA } from '../../../modules/arriba/main.nf' addParams( options: [:] )
|
||||
|
||||
workflow test_arriba_single_end {
|
||||
|
||||
input = [ [ id:'test', single_end:true ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
STAR_GENOMEGENERATE ( fasta, gtf )
|
||||
STAR_ALIGN ( input, STAR_GENOMEGENERATE.out.index, gtf )
|
||||
ARRIBA ( STAR_ALIGN.out.bam, fasta, gtf )
|
||||
}
|
||||
|
||||
workflow test_arriba_paired_end {
|
||||
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true) ]
|
||||
]
|
||||
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
|
||||
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
|
||||
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
STAR_GENOMEGENERATE ( fasta, gtf )
|
||||
STAR_ALIGN ( input, STAR_GENOMEGENERATE.out.index, gtf )
|
||||
ARRIBA ( STAR_ALIGN.out.bam, fasta, gtf )
|
||||
}
|
93
tests/modules/arriba/test.yml
Normal file
93
tests/modules/arriba/test.yml
Normal file
|
@ -0,0 +1,93 @@
|
|||
- name: arriba test_arriba_single_end
|
||||
command: nextflow run tests/modules/arriba -entry test_arriba_single_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- arriba
|
||||
files:
|
||||
- path: output/arriba/test.fusions.discarded.tsv
|
||||
md5sum: cad8c215b938d1e45b747a5b7898a4c2
|
||||
- path: output/arriba/test.fusions.tsv
|
||||
md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70
|
||||
- path: output/index/star/Genome
|
||||
md5sum: a654229fbca6071dcb6b01ce7df704da
|
||||
- path: output/index/star/Log.out
|
||||
- path: output/index/star/SA
|
||||
md5sum: 8c3edc46697b72c9e92440d4cf43506c
|
||||
- path: output/index/star/SAindex
|
||||
md5sum: 9f085c626553b1c52f2827421972ac10
|
||||
- path: output/index/star/chrLength.txt
|
||||
md5sum: c81f40f27e72606d7d07097c1d56a5b5
|
||||
- path: output/index/star/chrName.txt
|
||||
md5sum: 5ae68a67b70976ee95342a7451cb5af1
|
||||
- path: output/index/star/chrNameLength.txt
|
||||
md5sum: b190587cae0531f3cf25552d8aa674db
|
||||
- path: output/index/star/chrStart.txt
|
||||
md5sum: 8d3291e6bcdbe9902fbd7c887494173f
|
||||
- path: output/index/star/exonGeTrInfo.tab
|
||||
md5sum: d04497f69d6ef889efd4d34fe63edcc4
|
||||
- path: output/index/star/exonInfo.tab
|
||||
md5sum: 0d560290fab688b7268d88d5494bf9fe
|
||||
- path: output/index/star/geneInfo.tab
|
||||
md5sum: 8b608537307443ffaee4927d2b428805
|
||||
- path: output/index/star/genomeParameters.txt
|
||||
md5sum: 9e42067b1ec70b773257529230dd7b3a
|
||||
- path: output/index/star/sjdbInfo.txt
|
||||
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
|
||||
- path: output/index/star/sjdbList.fromGTF.out.tab
|
||||
md5sum: 8760c33e966dad0b39f440301ebbdee4
|
||||
- path: output/index/star/sjdbList.out.tab
|
||||
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
|
||||
- path: output/index/star/transcriptInfo.tab
|
||||
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36
|
||||
- path: output/star/test.Aligned.out.bam
|
||||
md5sum: 29c99195dcc79ff4df1f754ff16aac78
|
||||
- path: output/star/test.Log.final.out
|
||||
- path: output/star/test.Log.out
|
||||
- path: output/star/test.Log.progress.out
|
||||
- path: output/star/test.SJ.out.tab
|
||||
|
||||
- name: arriba test_arriba_paired_end
|
||||
command: nextflow run tests/modules/arriba -entry test_arriba_paired_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- arriba
|
||||
files:
|
||||
- path: output/arriba/test.fusions.discarded.tsv
|
||||
md5sum: 85e36c887464e4deaa65f45174d3b8fd
|
||||
- path: output/arriba/test.fusions.tsv
|
||||
md5sum: 7c3383f7eb6d79b84b0bd30a7ef02d70
|
||||
- path: output/index/star/Genome
|
||||
md5sum: a654229fbca6071dcb6b01ce7df704da
|
||||
- path: output/index/star/Log.out
|
||||
- path: output/index/star/SA
|
||||
md5sum: 8c3edc46697b72c9e92440d4cf43506c
|
||||
- path: output/index/star/SAindex
|
||||
md5sum: 9f085c626553b1c52f2827421972ac10
|
||||
- path: output/index/star/chrLength.txt
|
||||
md5sum: c81f40f27e72606d7d07097c1d56a5b5
|
||||
- path: output/index/star/chrName.txt
|
||||
md5sum: 5ae68a67b70976ee95342a7451cb5af1
|
||||
- path: output/index/star/chrNameLength.txt
|
||||
md5sum: b190587cae0531f3cf25552d8aa674db
|
||||
- path: output/index/star/chrStart.txt
|
||||
md5sum: 8d3291e6bcdbe9902fbd7c887494173f
|
||||
- path: output/index/star/exonGeTrInfo.tab
|
||||
md5sum: d04497f69d6ef889efd4d34fe63edcc4
|
||||
- path: output/index/star/exonInfo.tab
|
||||
md5sum: 0d560290fab688b7268d88d5494bf9fe
|
||||
- path: output/index/star/geneInfo.tab
|
||||
md5sum: 8b608537307443ffaee4927d2b428805
|
||||
- path: output/index/star/genomeParameters.txt
|
||||
md5sum: 9e42067b1ec70b773257529230dd7b3a
|
||||
- path: output/index/star/sjdbInfo.txt
|
||||
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
|
||||
- path: output/index/star/sjdbList.fromGTF.out.tab
|
||||
md5sum: 8760c33e966dad0b39f440301ebbdee4
|
||||
- path: output/index/star/sjdbList.out.tab
|
||||
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
|
||||
- path: output/index/star/transcriptInfo.tab
|
||||
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36
|
||||
- path: output/star/test.Aligned.out.bam
|
||||
md5sum: d724ca90a102347b9c5052a33ea4d308
|
||||
- path: output/star/test.Log.final.out
|
||||
- path: output/star/test.Log.out
|
||||
- path: output/star/test.Log.progress.out
|
||||
- path: output/star/test.SJ.out.tab
|
Loading…
Reference in a new issue