diff --git a/modules/cellranger/.gitignore b/modules/cellranger/.gitignore new file mode 100644 index 00000000..9f8cb0f5 --- /dev/null +++ b/modules/cellranger/.gitignore @@ -0,0 +1 @@ +cellranger-*.tar.gz diff --git a/modules/cellranger/Dockerfile b/modules/cellranger/Dockerfile index aced4233..e9437bf6 100644 --- a/modules/cellranger/Dockerfile +++ b/modules/cellranger/Dockerfile @@ -4,7 +4,7 @@ LABEL authors="Gisela Gabernet " \ # Disclaimer: this container is not provided nor supported by 10x Genomics. # Install procps and clean apt cache -RUN apt-get update \ +RUN apt-get update --allow-releaseinfo-change \ && apt-get install -y procps \ && apt-get clean -y && rm -rf /var/lib/apt/lists/* diff --git a/modules/cellranger/readme.md b/modules/cellranger/README.md similarity index 100% rename from modules/cellranger/readme.md rename to modules/cellranger/README.md diff --git a/modules/cellranger/count/main.nf b/modules/cellranger/count/main.nf new file mode 100644 index 00000000..be3f512a --- /dev/null +++ b/modules/cellranger/count/main.nf @@ -0,0 +1,49 @@ +process CELLRANGER_COUNT { + tag "$meta.gem" + label 'process_high' + + if (params.enable_conda) { + exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." + } + container "nfcore/cellranger:6.0.2" + + input: + tuple val(meta), path(reads) + path reference + + output: + path("sample-${meta.gem}/outs/*"), emit: outs + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def sample_arg = meta.samples.unique().join(",") + def reference_name = reference.name + """ + cellranger \\ + count \\ + --id='sample-${meta.gem}' \\ + --fastqs=. \\ + --transcriptome=$reference_name \\ + --sample=$sample_arg \\ + --localcores=$task.cpus \\ + --localmem=${task.memory.toGiga()} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + + stub: + """ + mkdir -p "sample-${meta.gem}/outs/" + touch sample-${meta.gem}/outs/fake_file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/cellranger/count/meta.yml b/modules/cellranger/count/meta.yml new file mode 100644 index 00000000..e4647c98 --- /dev/null +++ b/modules/cellranger/count/meta.yml @@ -0,0 +1,40 @@ +name: cellranger_count +description: Module to use Cell Ranger's pipelines analyze sequencing data produced from Chromium Single Cell Gene Expression. +keywords: + - align + - count + - reference +tools: + - cellranger: + description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + doi: "" + licence: 10x Genomics EULA +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - reference: + type: folder + description: Folder containing all the reference indices needed by Cell Ranger +output: + - outs: + type: file + description: Files containing the outputs of Cell Ranger + pattern: "sample-${meta.gem}/outs/*" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/cellranger/mkfastq/main.nf b/modules/cellranger/mkfastq/main.nf new file mode 100644 index 00000000..14d68665 --- /dev/null +++ b/modules/cellranger/mkfastq/main.nf @@ -0,0 +1,31 @@ +process CELLRANGER_MKFASTQ { + tag "mkfastq" + label 'process_medium' + + if (params.enable_conda) { + exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." + } + container "litd/docker-cellranger:v6.1.1" // FIXME Add bcl2fastq to nf-core docker image + + input: + path bcl + path csv + + output: + path "versions.yml", emit: versions + path "*.fastq.gz" , emit: fastq + + script: + def args = task.ext.args ?: '' + """ + cellranger mkfastq --id=${bcl.getSimpleName()} \ + --run=$bcl \ + --csv=$csv + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/cellranger/mkfastq/meta.yml b/modules/cellranger/mkfastq/meta.yml new file mode 100644 index 00000000..e288fb8c --- /dev/null +++ b/modules/cellranger/mkfastq/meta.yml @@ -0,0 +1,38 @@ +name: cellranger_mkfastq +description: Module to create fastqs needed by the 10x Genomics Cell Ranger tool. Uses the cellranger mkfastq command. +keywords: + - reference + - mkfastq + - fastq + - illumina + - bcl2fastq +tools: + - cellranger: + description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + doi: "" + licence: 10x Genomics EULA +input: + - bcl: + type: file + description: Base call files + pattern: "*.bcl.bgzf" + - csv: + type: file + description: Sample sheet + pattern: "*.csv" +output: + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" + - "@RHReynolds" diff --git a/modules/cellranger/mkgtf/main.nf b/modules/cellranger/mkgtf/main.nf new file mode 100644 index 00000000..4db274d7 --- /dev/null +++ b/modules/cellranger/mkgtf/main.nf @@ -0,0 +1,31 @@ +process CELLRANGER_MKGTF { + tag "$gtf" + label 'process_low' + + if (params.enable_conda) { + exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." + } + container "nfcore/cellranger:6.0.2" + + input: + path gtf + + output: + path "*.filtered.gtf", emit: gtf + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + cellranger \\ + mkgtf \\ + $gtf \\ + ${gtf.baseName}.filtered.gtf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ +} diff --git a/modules/cellranger/mkgtf/meta.yml b/modules/cellranger/mkgtf/meta.yml new file mode 100644 index 00000000..c160072f --- /dev/null +++ b/modules/cellranger/mkgtf/meta.yml @@ -0,0 +1,31 @@ +name: cellranger_mkgtf +description: Module to build a filtered gtf needed by the 10x Genomics Cell Ranger tool. Uses the cellranger mkgtf command. +keywords: + - reference + - mkref + - index +tools: + - cellranger: + description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + doi: "" + licence: 10x Genomics EULA +input: + - gtf: + type: file + description: + pattern: "*.gtf" +output: + - gtf: + type: folder + description: gtf transcriptome file + pattern: "*.filtered.gtf" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@ggabernet" + - "@Emiller88" diff --git a/modules/cellranger/mkref/meta.yml b/modules/cellranger/mkref/meta.yml index 9b849af7..06bf5b93 100644 --- a/modules/cellranger/mkref/meta.yml +++ b/modules/cellranger/mkref/meta.yml @@ -1,39 +1,37 @@ name: cellranger_mkref description: Module to build the reference needed by the 10x Genomics Cell Ranger tool. Uses the cellranger mkref command. keywords: - - reference - - mkref - - index + - reference + - mkref + - index tools: - - cellranger: - description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. - homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger - documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - doi: "" - licence: 10x Genomics EULA - + - cellranger: + description: Cell Ranger by 10x Genomics is a set of analysis pipelines that process Chromium single-cell data to align reads, generate feature-barcode matrices, perform clustering and other secondary analysis, and more. + homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger + documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov + doi: "" + licence: 10x Genomics EULA input: - - fasta: - type: file - description: fasta genome file - pattern: "*.{fasta,fa}" - - gtf: - type: file - description: gtf transcriptome file - pattern: "*.gtf" - - reference_name: - type: val - description: name to give the reference folder - pattern: str - + - fasta: + type: file + description: fasta genome file + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: gtf transcriptome file + pattern: "*.gtf" + - reference_name: + type: val + description: name to give the reference folder + pattern: str output: - - versions: - type: file - description: File containing software version - pattern: "versions.yml" - - reference: - type: folder - description: Folder containing all the reference indices needed by Cell Ranger + - reference: + type: folder + description: Folder containing all the reference indices needed by Cell Ranger + - versions: + type: file + description: File containing software version + pattern: "versions.yml" authors: - "@ggabernet" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index aa59b7c9..85689d8b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -270,9 +270,25 @@ cat/fastq: - modules/cat/fastq/** - tests/modules/cat/fastq/** -cellranger/mkref: +cellranger/gtf: # &cellranger/gtf + - modules/cellranger/gtf/** + - tests/modules/cellranger/gtf/** + +cellranger/mkref: # &cellranger/mkref - modules/cellranger/mkref/** - tests/modules/cellranger/mkref/** + # - *cellranger/gtf + - modules/cellranger/gtf/** + - tests/modules/cellranger/gtf/** + +cellranger/count: + - modules/cellranger/count/** + - tests/modules/cellranger/count/** + # - *cellranger/mkref + - modules/cellranger/mkref/** + - tests/modules/cellranger/mkref/** + - modules/cellranger/gtf/** + - tests/modules/cellranger/gtf/** checkm/lineagewf: - modules/checkm/lineagewf/** diff --git a/tests/modules/cellranger/count/main.nf b/tests/modules/cellranger/count/main.nf new file mode 100644 index 00000000..bb9e11d1 --- /dev/null +++ b/tests/modules/cellranger/count/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CELLRANGER_MKGTF } from '../../../../modules/cellranger/mkgtf/main.nf' +include { CELLRANGER_MKREF } from '../../../../modules/cellranger/mkref/main.nf' +include { CELLRANGER_COUNT } from '../../../../modules/cellranger/count/main.nf' + +workflow test_cellranger_count { + + input = [ [ id:'test', single_end:true, strandedness:'forward', gem: '123', samples: ["test_10x"] ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_10x_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_10x_2_fastq_gz'], checkIfExists: true) + ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + reference_name = "homo_sapiens_chr22_reference" + + CELLRANGER_MKGTF ( gtf ) + + CELLRANGER_MKREF ( + fasta, + CELLRANGER_MKGTF.out.gtf, + reference_name + ) + + CELLRANGER_COUNT( + input, + CELLRANGER_MKREF.out.reference + ) +} diff --git a/tests/modules/cellranger/count/nextflow.config b/tests/modules/cellranger/count/nextflow.config new file mode 100644 index 00000000..16419fce --- /dev/null +++ b/tests/modules/cellranger/count/nextflow.config @@ -0,0 +1,31 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: CELLRANGER_MKGTF { + ext.args = '--attribute=gene_biotype:protein_coding \ + --attribute=gene_biotype:lincRNA \ + --attribute=gene_biotype:antisense \ + --attribute=gene_biotype:IG_LV_gene \ + --attribute=gene_biotype:IG_V_gene \ + --attribute=gene_biotype:IG_V_pseudogene \ + --attribute=gene_biotype:IG_D_gene \ + --attribute=gene_biotype:IG_J_gene \ + --attribute=gene_biotype:IG_J_pseudogene \ + --attribute=gene_biotype:IG_C_gene \ + --attribute=gene_biotype:IG_C_pseudogene \ + --attribute=gene_biotype:TR_V_gene \ + --attribute=gene_biotype:TR_V_pseudogene \ + --attribute=gene_biotype:TR_D_gene \ + --attribute=gene_biotype:TR_J_gene \ + --attribute=gene_biotype:TR_J_pseudogene \ + --attribute=gene_biotype:TR_C_gene' + + + } + + withName: CELLRANGER_COUNT { + ext.args = '--chemistry SC3Pv3' + } + +} diff --git a/tests/modules/cellranger/count/test.yml b/tests/modules/cellranger/count/test.yml new file mode 100644 index 00000000..6b151a2a --- /dev/null +++ b/tests/modules/cellranger/count/test.yml @@ -0,0 +1,19 @@ +- name: cellranger count test_cellranger_count + command: nextflow run tests/modules/cellranger/count -entry test_cellranger_count -c tests/config/nextflow.config -c tests/modules/cellranger/count/nextflow.config + tags: + - cellranger + - cellranger/count + files: + - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5 + - path: output/cellranger/sample-123/outs/metrics_summary.csv + md5sum: 707df0f101d479d93f412ca74f9c4131 + - path: output/cellranger/sample-123/outs/molecule_info.h5 + md5sum: cf03b2b3ca776a1c37aa3518e91268ba + - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam + md5sum: 15441da9cfceea0bb48c8b66b1b860df + - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai + md5sum: 7c3d49c77016a09535aff61a027f750c + - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix + - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5 + md5sum: 40c8df814eb8723b7317b234dc8222e9 + - path: output/cellranger/sample-123/outs/web_summary.html diff --git a/tests/modules/cellranger/mkfastq/main.nf b/tests/modules/cellranger/mkfastq/main.nf new file mode 100644 index 00000000..5e594fd1 --- /dev/null +++ b/tests/modules/cellranger/mkfastq/main.nf @@ -0,0 +1,26 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { UNTAR } from '../../../../modules/untar/main.nf' +include { CELLRANGER_MKFASTQ } from '../../../../modules/cellranger/mkfastq/main.nf' + +workflow test_cellranger_mkfastq_simple { + + simple_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-simple-1.2.0.csv", checkIfExists: true) + tiny_bcl = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) + + UNTAR ( tiny_bcl ) + + CELLRANGER_MKFASTQ ( UNTAR.out.untar, simple_csv) +} + +workflow test_cellranger_mkfastq_illumina { + + samplesheet_csv = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-samplesheet-1.2.0.csv", checkIfExists: true) + tiny_bcl = file("https://cf.10xgenomics.com/supp/cell-exp/cellranger-tiny-bcl-1.2.0.tar.gz", checkIfExists: true) + + UNTAR ( tiny_bcl ) + + CELLRANGER_MKFASTQ ( UNTAR.out.untar, samplesheet_csv) +} diff --git a/tests/modules/cellranger/mkfastq/nextflow.config b/tests/modules/cellranger/mkfastq/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/cellranger/mkfastq/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/cellranger/mkfastq/test.yml b/tests/modules/cellranger/mkfastq/test.yml new file mode 100644 index 00000000..bdd32187 --- /dev/null +++ b/tests/modules/cellranger/mkfastq/test.yml @@ -0,0 +1,13 @@ +- name: cellranger mkfastq test_cellranger_mkfastq_simple + command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_simple -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config + tags: + - cellranger + - cellranger/mkfastq + # files: + # - path: output/cellranger/genome.filtered.gtf + # md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b +- name: cellranger mkfastq test_cellranger_mkfastq_illumina + command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config + tags: + - cellranger + - cellranger/mkfastq diff --git a/tests/modules/cellranger/mkgtf/main.nf b/tests/modules/cellranger/mkgtf/main.nf new file mode 100644 index 00000000..19e2cba0 --- /dev/null +++ b/tests/modules/cellranger/mkgtf/main.nf @@ -0,0 +1,11 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CELLRANGER_MKGTF } from '../../../../modules/cellranger/mkgtf/main.nf' + +workflow test_cellranger_mkgtf { + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + + CELLRANGER_MKGTF ( gtf ) +} diff --git a/tests/modules/cellranger/mkgtf/nextflow.config b/tests/modules/cellranger/mkgtf/nextflow.config new file mode 100644 index 00000000..03fd9e09 --- /dev/null +++ b/tests/modules/cellranger/mkgtf/nextflow.config @@ -0,0 +1,27 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: CELLRANGER_MKGTF { + ext.args = '--attribute=gene_biotype:protein_coding \ + --attribute=gene_biotype:lincRNA \ + --attribute=gene_biotype:antisense \ + --attribute=gene_biotype:IG_LV_gene \ + --attribute=gene_biotype:IG_V_gene \ + --attribute=gene_biotype:IG_V_pseudogene \ + --attribute=gene_biotype:IG_D_gene \ + --attribute=gene_biotype:IG_J_gene \ + --attribute=gene_biotype:IG_J_pseudogene \ + --attribute=gene_biotype:IG_C_gene \ + --attribute=gene_biotype:IG_C_pseudogene \ + --attribute=gene_biotype:TR_V_gene \ + --attribute=gene_biotype:TR_V_pseudogene \ + --attribute=gene_biotype:TR_D_gene \ + --attribute=gene_biotype:TR_J_gene \ + --attribute=gene_biotype:TR_J_pseudogene \ + --attribute=gene_biotype:TR_C_gene' + + + } + +} diff --git a/tests/modules/cellranger/mkgtf/test.yml b/tests/modules/cellranger/mkgtf/test.yml new file mode 100644 index 00000000..2130afd2 --- /dev/null +++ b/tests/modules/cellranger/mkgtf/test.yml @@ -0,0 +1,8 @@ +- name: cellranger mkgtf test_cellranger_mkgtf + command: nextflow run tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c tests/config/nextflow.config -c tests/modules/cellranger/mkgtf/nextflow.config + tags: + - cellranger + - cellranger/mkgtf + files: + - path: output/cellranger/genome.filtered.gtf + md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b