diff --git a/software/kallistobustools/ref/functions.nf b/software/kallistobustools/ref/functions.nf new file mode 100644 index 00000000..f177f0c8 --- /dev/null +++ b/software/kallistobustools/ref/functions.nf @@ -0,0 +1,60 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/kallistobustools/ref/main.nf b/software/kallistobustools/ref/main.nf new file mode 100644 index 00000000..7f304109 --- /dev/null +++ b/software/kallistobustools/ref/main.nf @@ -0,0 +1,67 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process KALLISTOBUSTOOLS_REF { + tag "$fasta" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } + + conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0" + } else { + container "quay.io/biocontainers/kb-python:0.25.1--py_0" + } + + input: + path fasta + path gtf + val workflow + + output: + path "*.version.txt" , emit: version + path "kb_ref_out.idx" , emit: index + path "t2g.txt" , emit: t2g + path "cdna.fa" , emit: cdna + path "intron.fa" , optional:true, emit: intron + path "cdna_t2c.txt" , optional:true, emit: cdna_t2c + path "intron_t2c.txt" , optional:true, emit: intron_t2c + + script: + def software = getSoftwareName(task.process) + if (workflow == "standard") { + """ + kb \\ + ref \\ + -i kb_ref_out.idx \\ + -g t2g.txt \\ + -f1 cdna.fa \\ + --workflow $workflow \\ + $fasta \\ + $gtf + + echo \$(kb 2>&1) | sed 's/^kb_python //; s/Usage.*\$//' > ${software}.version.txt + """ + } else { + """ + kb \\ + ref \\ + -i kb_ref_out.idx \\ + -g t2g.txt \\ + -f1 cdna.fa \\ + -f2 intron.fa \\ + -c1 cdna_t2c.txt \\ + -c2 intron_t2c.txt \\ + --workflow $workflow \\ + $fasta \\ + $gtf + + echo \$(kb 2>&1) | sed 's/^kb_python //; s/Usage.*\$//' > ${software}.version.txt + """ + } +} diff --git a/software/kallistobustools/ref/meta.yml b/software/kallistobustools/ref/meta.yml new file mode 100644 index 00000000..c2a85b37 --- /dev/null +++ b/software/kallistobustools/ref/meta.yml @@ -0,0 +1,60 @@ +name: kallistobustools_ref +description: index creation for kb count quantification of single-cell data. +keywords: + - kallisto-bustools + - index +tools: + - kb: + description: kallisto|bustools (kb) is a tool developed for fast and efficient processing of single-cell OMICS data. + homepage: https://www.kallistobus.tools/ + documentation: https://kb-python.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/pachterlab/kb_python + doi: "https://doi.org/10.22002/D1.1876" + licence: MIT License + +input: + - fasta: + type: file + description: Genomic DNA fasta file + pattern: "*.{fasta,fasta.gz}" + - gtf: + type: file + description: Genomic gtf file + pattern: "*.{gtf,gtf.gz}" + - workflow: + type: value + description: String value defining worfklow to use, can be one of "standard", "lamanno", "nucleus" + pattern: "{standard,lamanno,nucleus}" + +output: + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - kb_ref_idx: + type: file + description: Index file from kb ref. + pattern: "*.{idx}" + - t2g: + type: file + description: Transcript to gene table + pattern: "*t2g.{txt}" + - cdna: + type: file + description: Cdna fasta file + pattern: "*cdna.{fa}" + - intron: + type: file + description: intron fasta file + pattern: "*intron.{fa}" + - cdna_t2c: + type: file + description: cdna transcript to capture file + pattern: "*cdna_t2c.{txt}" + - intron_t2c: + type: file + description: intron transcript to capture file + pattern: "*intron_t2c.{txt}" + +authors: + - "@flowuenne" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index e154e08a..8541a0cb 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -182,11 +182,11 @@ fasttree: - software/fasttree/** - tests/software/fasttree/** -fgbio_callmolecularconsensusreads: +fgbio/callmolecularconsensusreads: - software/fgbio/callmolecularconsensusreads/** - tests/software/fgbio/callmolecularconsensusreads/** -fgbio_sortbam: +fgbio/sortbam: - software/fgbio/sortbam/** - tests/software/fgbio/sortbam/** @@ -293,6 +293,10 @@ kallisto/index: - software/kallisto/index/** - tests/software/kallisto/index/** +kallistobustools/ref: + - software/kallistobustools/ref/** + - tests/software/kallistobustools/ref/** + kraken2/run: - software/kraken2/run/** - tests/software/kraken2/run/** @@ -317,11 +321,11 @@ mosdepth: - software/mosdepth/** - tests/software/mosdepth/** -msisensor_msi: +msisensor/msi: - software/msisensor/msi/** - tests/software/msisensor/msi/** -msisensor_scan: +msisensor/scan: - software/msisensor/scan/** - tests/software/msisensor/scan/** diff --git a/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz b/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz new file mode 100644 index 00000000..54dda05e Binary files /dev/null and b/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz differ diff --git a/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz b/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz new file mode 100644 index 00000000..42500d16 Binary files /dev/null and b/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz differ diff --git a/tests/software/kallistobustools/ref/main.nf b/tests/software/kallistobustools/ref/main.nf new file mode 100644 index 00000000..8c430bb7 --- /dev/null +++ b/tests/software/kallistobustools/ref/main.nf @@ -0,0 +1,32 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { KALLISTOBUSTOOLS_REF } from '../../../../software/kallistobustools/ref/main.nf' addParams( options: [:] ) + +workflow test_kallistobustools_ref_standard { + + fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true) + gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true) + workflow = "standard" + + KALLISTOBUSTOOLS_REF(fasta, gtf, workflow) +} + +workflow test_kallistobustools_ref_lamanno { + + fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true) + gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true) + workflow = "standard" + + KALLISTOBUSTOOLS_REF( fasta, gtf, workflow) +} + +workflow test_kallistobustools_ref_nucleus { + + fasta = file("${launchDir}/tests/data/delete_me/kallistobustools/GRCm39.chr19_100k.fa.gz", checkIfExists: true) + gtf = file("${launchDir}/tests/data/delete_me/kallistobustools/gencode.VM26.chr19_10k.gtf.gz", checkIfExists: true) + workflow = "standard" + + KALLISTOBUSTOOLS_REF( fasta, gtf, workflow) +} diff --git a/tests/software/kallistobustools/ref/test.yml b/tests/software/kallistobustools/ref/test.yml new file mode 100644 index 00000000..aeeef936 --- /dev/null +++ b/tests/software/kallistobustools/ref/test.yml @@ -0,0 +1,41 @@ +- name: kallistobustools ref test_kallistobustools_ref_standard + command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_standard -c tests/config/nextflow.config + tags: + - kallistobustools + - kallistobustools_ref_standard + - kallistobustools/ref + files: + - path: output/kallistobustools/cdna.fa + md5sum: da194221c883656c68e2f90d8f77a56b + - path: output/kallistobustools/kb_ref_out.idx + md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29 + - path: output/kallistobustools/t2g.txt + md5sum: 3089d303d9cd1a39a27969eca2dcaba7 + +- name: kallistobustools ref test_kallistobustools_ref_lamanno + command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_lamanno -c tests/config/nextflow.config + tags: + - kallistobustools + - kallistobustools_ref_lamanno + - kallistobustools/ref + files: + - path: output/kallistobustools/cdna.fa + md5sum: da194221c883656c68e2f90d8f77a56b + - path: output/kallistobustools/kb_ref_out.idx + md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29 + - path: output/kallistobustools/t2g.txt + md5sum: 3089d303d9cd1a39a27969eca2dcaba7 + +- name: kallistobustools ref test_kallistobustools_ref_nucleus + command: nextflow run tests/software/kallistobustools/ref -entry test_kallistobustools_ref_nucleus -c tests/config/nextflow.config + tags: + - kallistobustools_ref_nucleus + - kallistobustools + - kallistobustools/ref + files: + - path: output/kallistobustools/cdna.fa + md5sum: da194221c883656c68e2f90d8f77a56b + - path: output/kallistobustools/kb_ref_out.idx + md5sum: c0ffe9554cb6fdcc7cbb4a1285ca8f29 + - path: output/kallistobustools/t2g.txt + md5sum: 3089d303d9cd1a39a27969eca2dcaba7