* bump bcftools filter

* fix bcftools

* rsem/preparereference

* added calculateexpression

* meta.yml and pytest_software

* paired end

* add test.yml
This commit is contained in:
Kevin Menden 2021-07-02 13:09:48 +02:00 committed by GitHub
parent b5f6a1da79
commit fce4e60864
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 232 additions and 0 deletions

View file

@ -0,0 +1,63 @@
name: rsem_calculateexpression
description: Calculate expression with RSEM
keywords:
- rsem
- expression
- quantification
tools:
- rseqc:
description: |
RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome
homepage: https://github.com/deweylab/RSEM
documentation: https://github.com/deweylab/RSEM
doi: https://doi.org/10.1186/1471-2105-12-323
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: Input reads for quantification
pattern: "*.fastq.gz"#
- index:
type: file
description: RSEM index
pattern: "rsem/*"
output:
- counts_gene:
type: file
description: Expression counts on gene level
pattern: "*.genes.results"
- counts_transctips:
type: file
description: Expression counts on transcript level
pattern: "*.isoforms.results"
- stat:
type: file
description: RSEM statistics
pattern: "*.stat"
- logs:
type: file
description: RSEM logs
pattern: "*.log"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
- bam_star:
type: file
description: BAM file generated by STAR (optional)
pattern: "*.STAR.genome.bam"
- bam_genome:
type: file
description: Genome BAM file (optional)
pattern: "*.genome.bam"
- bam_transcript:
type: file
description: Transcript BAM file (optional)
pattern: "*.transcript.bam"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,37 @@
name: rsem_preparereference
description: Prepare a reference genome for RSEM
keywords:
- rsem
- reference
tools:
- rseqc:
description: |
RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome
homepage: https://github.com/deweylab/RSEM
documentation: https://github.com/deweylab/RSEM
doi: https://doi.org/10.1186/1471-2105-12-323
input:
- fasta:
type: file
description: The Fasta file of the reference genome
pattern: "*.{fasta,fa}"
- gtf:
type: file
description: The GTF file of the reference genome
pattern: "*.gtf"
output:
- rsem:
type: directory
description: RSEM index directory
pattern: "rsem"
- transcript_fasta:
type: file
description: Fasta file of transcripts
pattern: "rsem/*transcripts.fa"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -579,6 +579,14 @@ raxmlng:
- software/raxmlng/** - software/raxmlng/**
- tests/software/raxmlng/** - tests/software/raxmlng/**
rsem/preparereference:
- software/rsem/preparereference/**
- tests/software/rsem/preparereference/**
rsem/calculateexpression:
- software/rsem/calculateexpression/**
- tests/software/rsem/calculateexpression/**
rseqc/bamstat: rseqc/bamstat:
- software/rseqc/bamstat/** - software/rseqc/bamstat/**
- tests/software/rseqc/bamstat/** - tests/software/rseqc/bamstat/**

View file

@ -140,6 +140,8 @@ params {
test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz" test2_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2_2.fastq.gz"
test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz" test2_umi_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_1.fastq.gz"
test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz" test2_umi_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test2.umi_2.fastq.gz"
test_rnaseq_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.rnaseq_1.fastq.gz"
test_rnaseq_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/fastq/test.rnaseq_2.fastq.gz"
test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table" test_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table"
test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table" test2_baserecalibrator_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table"

View file

@ -0,0 +1,21 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEM_PREPAREREFERENCE } from '../../../../software/rsem/preparereference/main.nf' addParams(options: [args: "--star"])
include { RSEM_CALCULATEEXPRESSION } from '../../../../software/rsem/calculateexpression/main.nf' addParams(options: [args: "--star --star-gzipped-read-file"])
workflow test_rsem_calculateexpression {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
input = [ [ id:'test', single_end:false, strandedness: 'forward' ], // meta map
[file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)]
]
RSEM_PREPAREREFERENCE ( fasta, gtf )
RSEM_CALCULATEEXPRESSION( input, RSEM_PREPAREREFERENCE.out.index )
}

View file

@ -0,0 +1,66 @@
- name: rsem calculateexpression test_rsem_calculateexpression
command: nextflow run tests/software/rsem/calculateexpression -entry test_rsem_calculateexpression -c tests/config/nextflow.config
tags:
- rsem/calculateexpression
- rsem
files:
- path: output/index/rsem/Genome
md5sum: a654229fbca6071dcb6b01ce7df704da
- path: output/index/rsem/Log.out
- path: output/index/rsem/SA
md5sum: 8c3edc46697b72c9e92440d4cf43506c
- path: output/index/rsem/SAindex
md5sum: fd05c149960e72642a8d7c860528ae81
- path: output/index/rsem/chrLength.txt
md5sum: c81f40f27e72606d7d07097c1d56a5b5
- path: output/index/rsem/chrName.txt
md5sum: 5ae68a67b70976ee95342a7451cb5af1
- path: output/index/rsem/chrNameLength.txt
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/index/rsem/chrStart.txt
md5sum: 8d3291e6bcdbe9902fbd7c887494173f
- path: output/index/rsem/exonGeTrInfo.tab
md5sum: d04497f69d6ef889efd4d34fe63edcc4
- path: output/index/rsem/exonInfo.tab
md5sum: 0d560290fab688b7268d88d5494bf9fe
- path: output/index/rsem/geneInfo.tab
md5sum: 8b608537307443ffaee4927d2b428805
- path: output/index/rsem/genome.chrlist
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/index/rsem/genome.fasta
md5sum: f315020d899597c1b57e5fe9f60f4c3e
- path: output/index/rsem/genome.grp
md5sum: c2848a8b6d495956c11ec53efc1de67e
- path: output/index/rsem/genome.idx.fa
md5sum: 050c521a2719c2ae48267c1e65218f29
- path: output/index/rsem/genome.n2g.idx.fa
md5sum: 050c521a2719c2ae48267c1e65218f29
- path: output/index/rsem/genome.seq
md5sum: 94da0c6b88c33e63c9a052a11f4f57c1
- path: output/index/rsem/genome.ti
md5sum: c9e4ae8d4d13a504eec2acf1b8589a66
- path: output/index/rsem/genome.transcripts.fa
md5sum: 050c521a2719c2ae48267c1e65218f29
- path: output/index/rsem/genomeParameters.txt
md5sum: 2fe3a030e1706c3e8cd4df3818e6dd2f
- path: output/index/rsem/sjdbInfo.txt
md5sum: 5690ea9d9f09f7ff85b7fd47bd234903
- path: output/index/rsem/sjdbList.fromGTF.out.tab
md5sum: 8760c33e966dad0b39f440301ebbdee4
- path: output/index/rsem/sjdbList.out.tab
md5sum: 9e4f991abbbfeb3935a2bb21b9e258f1
- path: output/index/rsem/transcriptInfo.tab
md5sum: 0c3a5adb49d15e5feff81db8e29f2e36
- path: output/rsem/test.genes.results
md5sum: c7ec226f76736ea805771e73553ae359
- path: output/rsem/test.isoforms.results
md5sum: 99f7f80aa505b44ca429fdebbd7dd5d8
- path: output/rsem/test.log
- path: output/rsem/test.stat/test.cnt
md5sum: 76249e6b2f3c104f414aae596ba2c2f4
- path: output/rsem/test.stat/test.model
md5sum: a7a4bc1734918ef5848604e3362b83e2
- path: output/rsem/test.stat/test.theta
md5sum: de2e4490c98cc5383a86ae8225fd0a28
- path: output/rsem/test.transcript.bam
md5sum: 7846491086c478858419667d60f18edd

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEM_PREPAREREFERENCE } from '../../../../software/rsem/preparereference/main.nf' addParams(options: [:])
workflow test_rsem_preparereference {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)
RSEM_PREPAREREFERENCE ( fasta, gtf )
}

View file

@ -0,0 +1,22 @@
- name: rsem preparereference test_rsem_preparereference
command: nextflow run tests/software/rsem/preparereference -entry test_rsem_preparereference -c tests/config/nextflow.config
tags:
- rsem
- rsem/preparereference
files:
- path: output/index/rsem/genome.chrlist
md5sum: b190587cae0531f3cf25552d8aa674db
- path: output/index/rsem/genome.fasta
md5sum: f315020d899597c1b57e5fe9f60f4c3e
- path: output/index/rsem/genome.grp
md5sum: c2848a8b6d495956c11ec53efc1de67e
- path: output/index/rsem/genome.idx.fa
md5sum: 050c521a2719c2ae48267c1e65218f29
- path: output/index/rsem/genome.n2g.idx.fa
md5sum: 050c521a2719c2ae48267c1e65218f29
- path: output/index/rsem/genome.seq
md5sum: 94da0c6b88c33e63c9a052a11f4f57c1
- path: output/index/rsem/genome.ti
md5sum: c9e4ae8d4d13a504eec2acf1b8589a66
- path: output/index/rsem/genome.transcripts.fa
md5sum: 050c521a2719c2ae48267c1e65218f29