From fce4e60864e43a2ffdbf7d3e4790fe1a68c2eb03 Mon Sep 17 00:00:00 2001 From: Kevin Menden Date: Fri, 2 Jul 2021 13:09:48 +0200 Subject: [PATCH] Fix rsem (#555) * bump bcftools filter * fix bcftools * rsem/preparereference * added calculateexpression * meta.yml and pytest_software * paired end * add test.yml --- software/rsem/calculateexpression/meta.yml | 63 ++++++++++++++++++ software/rsem/preparereference/meta.yml | 37 +++++++++++ tests/config/pytest_software.yml | 8 +++ tests/config/test_data.config | 2 + .../software/rsem/calculateexpression/main.nf | 21 ++++++ .../rsem/calculateexpression/test.yml | 66 +++++++++++++++++++ tests/software/rsem/preparereference/main.nf | 13 ++++ tests/software/rsem/preparereference/test.yml | 22 +++++++ 8 files changed, 232 insertions(+) create mode 100644 software/rsem/calculateexpression/meta.yml create mode 100644 software/rsem/preparereference/meta.yml create mode 100644 tests/software/rsem/calculateexpression/main.nf create mode 100644 tests/software/rsem/calculateexpression/test.yml create mode 100644 tests/software/rsem/preparereference/main.nf create mode 100644 tests/software/rsem/preparereference/test.yml diff --git a/software/rsem/calculateexpression/meta.yml b/software/rsem/calculateexpression/meta.yml new file mode 100644 index 00000000..f8577085 --- /dev/null +++ b/software/rsem/calculateexpression/meta.yml @@ -0,0 +1,63 @@ +name: rsem_calculateexpression +description: Calculate expression with RSEM +keywords: + - rsem + - expression + - quantification +tools: + - rseqc: + description: | + RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome + homepage: https://github.com/deweylab/RSEM + documentation: https://github.com/deweylab/RSEM + doi: https://doi.org/10.1186/1471-2105-12-323 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input reads for quantification + pattern: "*.fastq.gz"# + - index: + type: file + description: RSEM index + pattern: "rsem/*" +output: + - counts_gene: + type: file + description: Expression counts on gene level + pattern: "*.genes.results" + - counts_transctips: + type: file + description: Expression counts on transcript level + pattern: "*.isoforms.results" + - stat: + type: file + description: RSEM statistics + pattern: "*.stat" + - logs: + type: file + description: RSEM logs + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - bam_star: + type: file + description: BAM file generated by STAR (optional) + pattern: "*.STAR.genome.bam" + - bam_genome: + type: file + description: Genome BAM file (optional) + pattern: "*.genome.bam" + - bam_transcript: + type: file + description: Transcript BAM file (optional) + pattern: "*.transcript.bam" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/software/rsem/preparereference/meta.yml b/software/rsem/preparereference/meta.yml new file mode 100644 index 00000000..d7c02154 --- /dev/null +++ b/software/rsem/preparereference/meta.yml @@ -0,0 +1,37 @@ +name: rsem_preparereference +description: Prepare a reference genome for RSEM +keywords: + - rsem + - reference +tools: + - rseqc: + description: | + RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome + homepage: https://github.com/deweylab/RSEM + documentation: https://github.com/deweylab/RSEM + doi: https://doi.org/10.1186/1471-2105-12-323 +input: + - fasta: + type: file + description: The Fasta file of the reference genome + pattern: "*.{fasta,fa}" + - gtf: + type: file + description: The GTF file of the reference genome + pattern: "*.gtf" +output: + - rsem: + type: directory + description: RSEM index directory + pattern: "rsem" + - transcript_fasta: + type: file + description: Fasta file of transcripts + pattern: "rsem/*transcripts.fa" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index 658d5442..6a32058f 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -579,6 +579,14 @@ raxmlng: - software/raxmlng/** - tests/software/raxmlng/** +rsem/preparereference: + - software/rsem/preparereference/** + - tests/software/rsem/preparereference/** + +rsem/calculateexpression: + - software/rsem/calculateexpression/** + - tests/software/rsem/calculateexpression/** + rseqc/bamstat: - software/rseqc/bamstat/** - tests/software/rseqc/bamstat/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 228a327f..314a2329 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -140,6 +140,8 @@ params { test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" + test_rnaseq_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.rnaseq_1.fastq.gz" + test_rnaseq_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.rnaseq_2.fastq.gz" test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" diff --git a/tests/software/rsem/calculateexpression/main.nf b/tests/software/rsem/calculateexpression/main.nf new file mode 100644 index 00000000..57177596 --- /dev/null +++ b/tests/software/rsem/calculateexpression/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RSEM_PREPAREREFERENCE } from '../../../../software/rsem/preparereference/main.nf' addParams(options: [args: "--star"]) +include { RSEM_CALCULATEEXPRESSION } from '../../../../software/rsem/calculateexpression/main.nf' addParams(options: [args: "--star --star-gzipped-read-file"]) + +workflow test_rsem_calculateexpression { + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + + input = [ [ id:'test', single_end:false, strandedness: 'forward' ], // meta map + [file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)] + ] + + RSEM_PREPAREREFERENCE ( fasta, gtf ) + + RSEM_CALCULATEEXPRESSION( input, RSEM_PREPAREREFERENCE.out.index ) +} diff --git a/tests/software/rsem/calculateexpression/test.yml b/tests/software/rsem/calculateexpression/test.yml new file mode 100644 index 00000000..b4bb2e65 --- /dev/null +++ b/tests/software/rsem/calculateexpression/test.yml @@ -0,0 +1,66 @@ +- name: rsem calculateexpression test_rsem_calculateexpression + command: nextflow run tests/software/rsem/calculateexpression -entry test_rsem_calculateexpression -c tests/config/nextflow.config + tags: + - rsem/calculateexpression + - rsem + files: + - path: output/index/rsem/Genome + md5sum: a654229fbca6071dcb6b01ce7df704da + - path: output/index/rsem/Log.out + - path: output/index/rsem/SA + md5sum: 8c3edc46697b72c9e92440d4cf43506c + - path: output/index/rsem/SAindex + md5sum: fd05c149960e72642a8d7c860528ae81 + - path: output/index/rsem/chrLength.txt + md5sum: c81f40f27e72606d7d07097c1d56a5b5 + - path: output/index/rsem/chrName.txt + md5sum: 5ae68a67b70976ee95342a7451cb5af1 + - path: output/index/rsem/chrNameLength.txt + md5sum: b190587cae0531f3cf25552d8aa674db + - path: output/index/rsem/chrStart.txt + md5sum: 8d3291e6bcdbe9902fbd7c887494173f + - path: output/index/rsem/exonGeTrInfo.tab + md5sum: d04497f69d6ef889efd4d34fe63edcc4 + - path: output/index/rsem/exonInfo.tab + md5sum: 0d560290fab688b7268d88d5494bf9fe + - path: output/index/rsem/geneInfo.tab + md5sum: 8b608537307443ffaee4927d2b428805 + - path: output/index/rsem/genome.chrlist + md5sum: b190587cae0531f3cf25552d8aa674db + - path: output/index/rsem/genome.fasta + md5sum: f315020d899597c1b57e5fe9f60f4c3e + - path: output/index/rsem/genome.grp + md5sum: c2848a8b6d495956c11ec53efc1de67e + - path: output/index/rsem/genome.idx.fa + md5sum: 050c521a2719c2ae48267c1e65218f29 + - path: output/index/rsem/genome.n2g.idx.fa + md5sum: 050c521a2719c2ae48267c1e65218f29 + - path: output/index/rsem/genome.seq + md5sum: 94da0c6b88c33e63c9a052a11f4f57c1 + - path: output/index/rsem/genome.ti + md5sum: c9e4ae8d4d13a504eec2acf1b8589a66 + - path: output/index/rsem/genome.transcripts.fa + md5sum: 050c521a2719c2ae48267c1e65218f29 + - path: output/index/rsem/genomeParameters.txt + md5sum: 2fe3a030e1706c3e8cd4df3818e6dd2f + - path: output/index/rsem/sjdbInfo.txt + md5sum: 5690ea9d9f09f7ff85b7fd47bd234903 + - path: output/index/rsem/sjdbList.fromGTF.out.tab + md5sum: 8760c33e966dad0b39f440301ebbdee4 + - path: output/index/rsem/sjdbList.out.tab + md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1 + - path: output/index/rsem/transcriptInfo.tab + md5sum: 0c3a5adb49d15e5feff81db8e29f2e36 + - path: output/rsem/test.genes.results + md5sum: c7ec226f76736ea805771e73553ae359 + - path: output/rsem/test.isoforms.results + md5sum: 99f7f80aa505b44ca429fdebbd7dd5d8 + - path: output/rsem/test.log + - path: output/rsem/test.stat/test.cnt + md5sum: 76249e6b2f3c104f414aae596ba2c2f4 + - path: output/rsem/test.stat/test.model + md5sum: a7a4bc1734918ef5848604e3362b83e2 + - path: output/rsem/test.stat/test.theta + md5sum: de2e4490c98cc5383a86ae8225fd0a28 + - path: output/rsem/test.transcript.bam + md5sum: 7846491086c478858419667d60f18edd diff --git a/tests/software/rsem/preparereference/main.nf b/tests/software/rsem/preparereference/main.nf new file mode 100644 index 00000000..9e38acdf --- /dev/null +++ b/tests/software/rsem/preparereference/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RSEM_PREPAREREFERENCE } from '../../../../software/rsem/preparereference/main.nf' addParams(options: [:]) + +workflow test_rsem_preparereference { + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + + RSEM_PREPAREREFERENCE ( fasta, gtf ) +} diff --git a/tests/software/rsem/preparereference/test.yml b/tests/software/rsem/preparereference/test.yml new file mode 100644 index 00000000..78c5ac67 --- /dev/null +++ b/tests/software/rsem/preparereference/test.yml @@ -0,0 +1,22 @@ +- name: rsem preparereference test_rsem_preparereference + command: nextflow run tests/software/rsem/preparereference -entry test_rsem_preparereference -c tests/config/nextflow.config + tags: + - rsem + - rsem/preparereference + files: + - path: output/index/rsem/genome.chrlist + md5sum: b190587cae0531f3cf25552d8aa674db + - path: output/index/rsem/genome.fasta + md5sum: f315020d899597c1b57e5fe9f60f4c3e + - path: output/index/rsem/genome.grp + md5sum: c2848a8b6d495956c11ec53efc1de67e + - path: output/index/rsem/genome.idx.fa + md5sum: 050c521a2719c2ae48267c1e65218f29 + - path: output/index/rsem/genome.n2g.idx.fa + md5sum: 050c521a2719c2ae48267c1e65218f29 + - path: output/index/rsem/genome.seq + md5sum: 94da0c6b88c33e63c9a052a11f4f57c1 + - path: output/index/rsem/genome.ti + md5sum: c9e4ae8d4d13a504eec2acf1b8589a66 + - path: output/index/rsem/genome.transcripts.fa + md5sum: 050c521a2719c2ae48267c1e65218f29