Added kallistobustools/ref (#408)

* Added kallistobustools/ref. Local tests all passing with Docker. Linting passed. Test data currently in /tests/data/delete_me

* Removed trailing whitespace line 29

* Moved workflow from meta to options.

* Update main.nf

* Forgot to remove previous testing input channel for workflow.

* Apply suggestions from code review

Applied changes suggested by @drpatelh

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Added gtf to meta.yml.

* Apply suggestions from code review

Adding @drpatelh suggested changes.

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Moved workflow to input value. Fixed tests.

* Update tests/software/kallistobustools/ref/test.yml

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Florian Wuennemann 2021-04-09 04:41:41 -04:00 committed by GitHub
parent 16e8d21477
commit 0f53c3b96b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 268 additions and 4 deletions

View file

@ -0,0 +1,60 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

View file

@ -0,0 +1,67 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
options = initOptions(params.options)
process KALLISTOBUSTOOLS_REF {
tag "$fasta"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') }
conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0"
} else {
container "quay.io/biocontainers/kb-python:0.25.1--py_0"
}
input:
path fasta
path gtf
val workflow
output:
path "*.version.txt" , emit: version
path "kb_ref_out.idx" , emit: index
path "t2g.txt" , emit: t2g
path "cdna.fa" , emit: cdna
path "intron.fa" , optional:true, emit: intron
path "cdna_t2c.txt" , optional:true, emit: cdna_t2c
path "intron_t2c.txt" , optional:true, emit: intron_t2c
script:
def software = getSoftwareName(task.process)
if (workflow == "standard") {
"""
kb \\
ref \\
-i kb_ref_out.idx \\
-g t2g.txt \\
-f1 cdna.fa \\
--workflow $workflow \\
$fasta \\
$gtf
echo \$(kb 2>&1) | sed 's/^kb_python //; s/Usage.*\$//' > ${software}.version.txt
"""
} else {
"""
kb \\
ref \\
-i kb_ref_out.idx \\
-g t2g.txt \\
-f1 cdna.fa \\
-f2 intron.fa \\
-c1 cdna_t2c.txt \\
-c2 intron_t2c.txt \\
--workflow $workflow \\
$fasta \\
$gtf
echo \$(kb 2>&1) | sed 's/^kb_python //; s/Usage.*\$//' > ${software}.version.txt
"""
}
}

View file

@ -0,0 +1,60 @@
name: kallistobustools_ref
description: index creation for kb count quantification of single-cell data.
keywords:
- kallisto-bustools
- index
tools:
- kb:
description: kallisto|bustools (kb) is a tool developed for fast and efficient processing of single-cell OMICS data.
homepage: https://www.kallistobus.tools/
documentation: https://kb-python.readthedocs.io/en/latest/index.html
tool_dev_url: https://github.com/pachterlab/kb_python
doi: "https://doi.org/10.22002/D1.1876"
licence: MIT License
input:
- fasta:
type: file
description: Genomic DNA fasta file
pattern: "*.{fasta,fasta.gz}"
- gtf:
type: file
description: Genomic gtf file
pattern: "*.{gtf,gtf.gz}"
- workflow:
type: value
description: String value defining worfklow to use, can be one of "standard", "lamanno", "nucleus"
pattern: "{standard,lamanno,nucleus}"
output:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
- kb_ref_idx:
type: file
description: Index file from kb ref.
pattern: "*.{idx}"
- t2g:
type: file
description: Transcript to gene table
pattern: "*t2g.{txt}"
- cdna:
type: file
description: Cdna fasta file
pattern: "*cdna.{fa}"
- intron:
type: file
description: intron fasta file
pattern: "*intron.{fa}"
- cdna_t2c:
type: file
description: cdna transcript to capture file
pattern: "*cdna_t2c.{txt}"
- intron_t2c:
type: file
description: intron transcript to capture file
pattern: "*intron_t2c.{txt}"
authors:
- "@flowuenne"

View file

@ -182,11 +182,11 @@ fasttree:
- software/fasttree/**
- tests/software/fasttree/**
fgbio_callmolecularconsensusreads:
fgbio/callmolecularconsensusreads:
- software/fgbio/callmolecularconsensusreads/**
- tests/software/fgbio/callmolecularconsensusreads/**
fgbio_sortbam:
fgbio/sortbam:
- software/fgbio/sortbam/**
- tests/software/fgbio/sortbam/**
@ -293,6 +293,10 @@ kallisto/index:
- software/kallisto/index/**
- tests/software/kallisto/index/**
kallistobustools/ref:
- software/kallistobustools/ref/**
- tests/software/kallistobustools/ref/**
kraken2/run:
- software/kraken2/run/**
- tests/software/kraken2/run/**
@ -317,11 +321,11 @@ mosdepth:
- software/mosdepth/**
- tests/software/mosdepth/**
msisensor_msi:
msisensor/msi:
- software/msisensor/msi/**
- tests/software/msisensor/msi/**
msisensor_scan:
msisensor/scan:
- software/msisensor/scan/**
- tests/software/msisensor/scan/**

View file

@ -0,0 +1,32 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { KALLISTOBUSTOOLS_REF } from '../../../../software/kallistobustools/ref/main.nf' addParams( options: [:] )
workflow test_kallistobustools_ref_standard {
fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true)
gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true)
workflow = "standard"
KALLISTOBUSTOOLS_REF(fasta, gtf, workflow)
}
workflow test_kallistobustools_ref_lamanno {
fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true)
gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true)
workflow = "standard"
KALLISTOBUSTOOLS_REF( fasta, gtf, workflow)
}
workflow test_kallistobustools_ref_nucleus {
fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true)
gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true)
workflow = "standard"
KALLISTOBUSTOOLS_REF( fasta, gtf, workflow)
}

View file

@ -0,0 +1,41 @@
- name: kallistobustools ref test_kallistobustools_ref_standard
command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_standard -c tests/config/nextflow.config
tags:
- kallistobustools
- kallistobustools_ref_standard
- kallistobustools/ref
files:
- path: output/kallistobustools/cdna.fa
md5sum: da194221c883656c68e2f90d8f77a56b
- path: output/kallistobustools/kb_ref_out.idx
md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29
- path: output/kallistobustools/t2g.txt
md5sum: 3089d303d9cd1a39a27969eca2dcaba7
- name: kallistobustools ref test_kallistobustools_ref_lamanno
command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_lamanno -c tests/config/nextflow.config
tags:
- kallistobustools
- kallistobustools_ref_lamanno
- kallistobustools/ref
files:
- path: output/kallistobustools/cdna.fa
md5sum: da194221c883656c68e2f90d8f77a56b
- path: output/kallistobustools/kb_ref_out.idx
md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29
- path: output/kallistobustools/t2g.txt
md5sum: 3089d303d9cd1a39a27969eca2dcaba7
- name: kallistobustools ref test_kallistobustools_ref_nucleus
command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_nucleus -c tests/config/nextflow.config
tags:
- kallistobustools_ref_nucleus
- kallistobustools
- kallistobustools/ref
files:
- path: output/kallistobustools/cdna.fa
md5sum: da194221c883656c68e2f90d8f77a56b
- path: output/kallistobustools/kb_ref_out.idx
md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29
- path: output/kallistobustools/t2g.txt
md5sum: 3089d303d9cd1a39a27969eca2dcaba7