new module cellranger mkref (#896)

* add cellranger mkref module

* add cellranger mkref tests

* update test yml chksum

* fix module linting

* fix test yml

* fix getprocessname

* fix versions typo

* fix cellranger test.yml

* fix versions.yml

* test versions.yml

* fix grep version

* fix cellranger version

* add dockerfile and readme

* review container statement

* Update modules/cellranger/mkref/meta.yml

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>

* add disclaimers

* change location dockerfile

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
This commit is contained in:
Gisela Gabernet 2021-10-28 13:33:57 +02:00 committed by GitHub
parent 0b0f87c2f7
commit d5183a7fec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 259 additions and 0 deletions

View file

@ -0,0 +1,21 @@
FROM continuumio/miniconda3:4.8.2
LABEL authors="Gisela Gabernet <gisela.gabernet@gmail.com>" \
description="Docker image containing Cell Ranger"
# Disclaimer: this container is not provided nor supported by 10x Genomics.
# Install procps and clean apt cache
RUN apt-get update \
&& apt-get install -y procps \
&& apt-get clean -y && rm -rf /var/lib/apt/lists/*
# Copy pre-downloaded cellranger file
ENV CELLRANGER_VER 6.0.2
COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz
# Install cellranger
RUN \
cd /opt && \
tar -xzvf cellranger-$CELLRANGER_VER.tar.gz && \
export PATH=/opt/cellranger-$CELLRANGER_VER:$PATH && \
ln -s /opt/cellranger-$CELLRANGER_VER/cellranger /usr/bin/cellranger && \
rm -rf /opt/cellranger-$CELLRANGER_VER.tar.gz

View file

@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//
//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}
//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}

View file

@ -0,0 +1,40 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process CELLRANGER_MKREF {
tag 'mkref'
label 'process_high'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
if (params.enable_conda) {
exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
}
container "nfcore/cellranger:6.0.2"
input:
path fasta
path gtf
val(reference_name)
output:
path "versions.yml" , emit: versions
path "${reference_name}", emit: reference
script:
"""
cellranger mkref \\
--genome=${reference_name} \\
--fasta=${fasta} \\
--genes=${gtf}
cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' )
END_VERSIONS
"""
}

View file

@ -0,0 +1,39 @@
name: cellranger_mkref
description: Module to build the reference needed by the 10x Genomics Cell Ranger tool. Uses the cellranger mkref command.
keywords:
- reference
- mkref
- index
tools:
- cellranger:
description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more.
homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger
documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov
tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov
doi: ""
licence: 10x Genomics EULA
input:
- fasta:
type: file
description: fasta genome file
pattern: "*.{fasta,fa}"
- gtf:
type: file
description: gtf transcriptome file
pattern: "*.gtf"
- reference_name:
type: val
description: name to give the reference folder
pattern: str
output:
- versions:
type: file
description: File containing software version
pattern: "versions.yml"
- reference:
type: folder
description: Folder containing all the reference indices needed by Cell Ranger
authors:
- "@ggabernet"

View file

@ -0,0 +1,18 @@
# Updating the docker container and making a new module release
Cell Ranger is a commercial tool by 10X Genomics. The container provided for the cellranger nf-core module is not provided nor supported by 10x Genomics. Updating the Cell Ranger version in the container and pushing the update to Dockerhub needs to be done manually.
1. Navigate to the [Cell Ranger download page](https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest) and download the tar ball of the desired Cell Ranger version with `curl` or `wget`. Place this file in the same folder where the Dockerfile lies.
2. Edit the Dockerfile: update the Cell Ranger version in this line:
```bash
ENV CELLRANGER_VER <VERSION>
```
3. Create the container:
```bash
docker build . -t nfcore/cellranger:<VERSION>
docker push nfcore/cellranger:<VERSION>
```

View file

@ -254,6 +254,10 @@ cat/fastq:
- modules/cat/fastq/**
- tests/modules/cat/fastq/**
cellranger/mkref:
- modules/cellranger/mkref/**
- tests/modules/cellranger/mkref/**
checkm/lineagewf:
- modules/checkm/lineagewf/**
- tests/modules/checkm/lineagewf/**

View file

@ -0,0 +1,16 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CELLRANGER_MKREF } from '../../../../modules/cellranger/mkref/main.nf' addParams( options: [:] )
workflow test_cellranger_mkref {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
reference_name = "homo_sapiens_chr22_reference"
CELLRANGER_MKREF ( fasta,
gtf,
reference_name )
}

View file

@ -0,0 +1,43 @@
- name: cellranger mkref test_cellranger_mkref
command: nextflow run tests/modules/cellranger/mkref -entry test_cellranger_mkref -c tests/config/nextflow.config
tags:
- cellranger
- cellranger/mkref
files:
- path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
md5sum: f315020d899597c1b57e5fe9f60f4c3e
- path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai
md5sum: 3520cd30e1b100e55f578db9c855f685
- path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
md5sum: 6d9b5f409bfea95022bc25b9590e194e
- path: output/cellranger/homo_sapiens_chr22_reference/reference.json
md5sum: a4e2b9bbf016c55b0d4d7bc1fa53896f
- path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
md5sum: 22102926fadf5890e905ca71b2da3f35
- path: output/cellranger/homo_sapiens_chr22_reference/star/SA
md5sum: bcf3e1a855783105150b46c905465333
- path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex
md5sum: b93fb07d342e6c32a00ebc4311c0ad38
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt
md5sum: c81f40f27e72606d7d07097c1d56a5b5
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt
md5sum: 5ae68a67b70976ee95342a7451cb5af1
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt
md5sum: bc73df776dd3d5bb9cfcbcba60880519
- path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab
md5sum: d04497f69d6ef889efd4d34fe63edcc4
- path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab
md5sum: 0d560290fab688b7268d88d5494bf9fe
- path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab
md5sum: 8b608537307443ffaee4927d2b428805
- path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbInfo.txt
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.fromGTF.out.tab
md5sum: 8760c33e966dad0b39f440301ebbdee4
- path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.out.tab
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
- path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36