Finish rseqc modules (#439)

* initial 'modules create' of minia

* fixed tests

* finished meta.yml

* fixed filters.yml

* resolved issues in pytest_software.yml

* add newline

* Update software/minia/main.nf

* fixing a bunch of module tests

* remove vscode

* fixed minia

* added rseqc/bamstat tests

* added tests/meta.yml for rseqc/inferexperiment

* added test/meta.yml for rseqc/inner_distances

* added meta.yml and tests/main.nf for junctionannotation (test not running)

* added test/meta.yml for rseqc/readdistribution

* finished test/meta.yml for rseqc/readduplication

* added entries to pytest_software.yml

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Kevin Menden 2021-04-13 09:49:32 +02:00 committed by GitHub
parent c736817598
commit 1d874bb0d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 440 additions and 0 deletions

View file

@ -0,0 +1,36 @@
name: rseqc_bamstat
description: Generate statistics from a bam file
keywords:
- bam
- qc
- bamstat
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the bam file to calculate statistics of
pattern: "*.{bam}"
output:
- txt:
type: file
description: bam statistics report
pattern: "*.bam_stat.txt"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,39 @@
name: rseqc_inferexperiment
description: Infer strandedness from sequencing reads
keywords:
- rnaseq
- experiment
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the bam file to calculate statistics of
pattern: "*.{bam}"
- bed:
type: file
description: a bed file for the reference gene model
pattern: "*.{bed}"
output:
- txt:
type: file
description: infer_experiment results report
pattern: "*.infer_experiment.txt"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,55 @@
name: rseqc_innerdistance
description: Calculate inner distance between read pairs.
keywords:
- fragment_size
- inner_distance
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the alignment in bam format
pattern: "*.{bam}"
- bed:
type: file
description: a bed file for the reference gene model
pattern: "*.{bed}"
output:
- distance:
type: file
description: the inner distances
pattern: "*.inner_distance.txt"
- freq:
type: file
description: frequencies of different insert sizes
pattern: "*.inner_distance_freq.txt"
- mean:
type: file
description: mean/median values of inner distances
pattern: "*.inner_distance_mean.txt"
- pdf:
type: file
description: distribution plot of inner distances
pattern: "*.inner_distance_plot.pdf"
- rscript:
type: file
description: script to reproduce the plot
pattern: "*.inner_distance_plot.R"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,61 @@
name: rseqc_innerdistance
description: compare detected splice junctions to reference gene model
keywords:
- junctions
- splicing
- rnaseq
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the alignment in bam format
pattern: "*.{bam}"
- bed:
type: file
description: a bed file for the reference gene model
pattern: "*.{bed}"
output:
- bed:
type: file
description: bed file of annotated junctions
pattern: "*.junction.bed"
- interact_bed:
type: file
description: Interact bed file
pattern: "*.Interact.bed"
- xls:
type: file
description: xls file with junction information
pattern: "*.xls"
- pdf:
type: file
description: junction plot
pattern: "*.junction.pdf"
- events_pdf:
type: file
description: events plot
pattern: "*.events.pdf"
- rscript:
type: file
description: Rscript to reproduce the plots
pattern: "*.r"
- log:
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,40 @@
name: rseqc_readdistribution
description: Calculate how mapped reads are distributed over genomic features
keywords:
- read distribution
- genomics
- rnaseq
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the alignment in bam format
pattern: "*.{bam}"
- bed:
type: file
description: a bed file for the reference gene model
pattern: "*.{bed}"
output:
- txt:
type: file
description: the read distribution report
pattern: "*.read_distribution.txt"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -0,0 +1,51 @@
name: rseqc_readduplication
description: Calculate read duplication rate
keywords:
- rnaseq
- duplication
tools:
- rseqc:
description: |
RSeQC package provides a number of useful modules that can comprehensively evaluate
high throughput sequence data especially RNA-seq data.
homepage: http://rseqc.sourceforge.net/
documentation: http://rseqc.sourceforge.net/
doi: 10.1093/bioinformatics/bts356
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: the alignment in bam format
pattern: "*.{bam}"
- bed:
type: file
description: a bed file for the reference gene model
pattern: "*.{bed}"
output:
- seq_xls:
type: file
description: Read duplication rate determined from mapping position of read
pattern: "*seq.DupRate.xls"
- pos_xls:
type: file
description: Read duplication rate determined from sequence of read
pattern: "*pos.DupRate.xls"
- pdf:
type: file
description: plot of duplication rate
pattern: "*.pdf"
- rscript:
type: file
description: script to reproduce the plot
pattern: "*.R"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@drpatelh"
- "@kevinmenden"

View file

@ -411,6 +411,26 @@ rasusa:
- software/rasusa/** - software/rasusa/**
- tests/software/rasusa/** - tests/software/rasusa/**
rseqc/bamstat:
- software/rseqc/bamstat/**
- tests/software/rseqc/bamstat/**
rseqc/inferexperiment:
- software/rseqc/inferexperiment/**
- tests/software/rseqc/inferexperiment/**
rseqc/innerdistance:
- software/rseqc/innerdistance/**
- tests/software/rseqc/innerdistance/**
rseqc/readdistribution:
- software/rseqc/readdistribution/**
- tests/software/rseqc/readdistribution/**
rseqc/readduplication:
- software/rseqc/readduplication/**
- tests/software/rseqc/readduplication/**
salmon/index: salmon/index:
- software/salmon/index/** - software/salmon/index/**
- tests/software/salmon/index/** - tests/software/salmon/index/**

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_BAMSTAT } from '../../../../software/rseqc/bamstat/main.nf' addParams(options: [:])
workflow test_rseqc_bamstat {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
RSEQC_BAMSTAT ( input )
}

View file

@ -0,0 +1,8 @@
- name: rseqc bamstat test_rseqc_bamstat
command: nextflow run tests/software/rseqc/bamstat -entry test_rseqc_bamstat -c tests/config/nextflow.config
tags:
- rseqc
- rseqc/bamstat
files:
- path: output/rseqc/test.bam_stat.txt
md5sum: 2675857864c1d1139b2a19d25dc36b09

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_INFEREXPERIMENT } from '../../../../software/rseqc/inferexperiment/main.nf' addParams(options: [:])
workflow test_rseqc_inferexperiment {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
bed = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
RSEQC_INFEREXPERIMENT ( input, bed )
}

View file

@ -0,0 +1,8 @@
- name: rseqc inferexperiment test_rseqc_inferexperiment
command: nextflow run tests/software/rseqc/inferexperiment -entry test_rseqc_inferexperiment -c tests/config/nextflow.config
tags:
- rseqc
- rseqc/inferexperiment
files:
- path: output/rseqc/test.infer_experiment.txt
md5sum: f9d0bfc239df637cd8aeda40ade3c59a

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_INNERDISTANCE } from '../../../../software/rseqc/innerdistance/main.nf' addParams(options: [:])
workflow test_rseqc_innerdistance {
input = [ [ id:'test', single_end: false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
bed = file(params.test_data['sarscov2']['genome']['test_bed12'], checkIfExists: true)
RSEQC_INNERDISTANCE ( input, bed )
}

View file

@ -0,0 +1,15 @@
- name: rseqc innerdistance test_rseqc_innerdistance
command: nextflow run tests/software/rseqc/innerdistance -entry test_rseqc_innerdistance -c tests/config/nextflow.config
tags:
- rseqc
- rseqc/innerdistance
files:
- path: output/rseqc/test.inner_distance.txt
md5sum: a1acc9def0f64a5500d4c4cb47cbe32b
- path: output/rseqc/test.inner_distance_freq.txt
md5sum: 3fc037501f5899b5da009c8ce02fc25e
- path: output/rseqc/test.inner_distance_mean.txt
md5sum: 58398b7d5a29a5e564f9e3c50b55996c
- path: output/rseqc/test.inner_distance_plot.pdf
- path: output/rseqc/test.inner_distance_plot.r
md5sum: 5859fbd5b42046d47e8b9aa85077f4ea

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_JUNCTIONANNOTATION } from '../../../../software/rseqc/junctionannotation/main.nf' addParams(options: [:])
workflow test_rseqc_junctionannotation {
input = [ [ id:'test', single_end: false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
bed = file(params.test_data['sarscov2']['genome']['test_bed12'], checkIfExists: true)
RSEQC_JUNCTIONANNOTATION ( input, bed )
}

View file

@ -0,0 +1,15 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_READDISTRIBUTION } from '../../../../software/rseqc/readdistribution/main.nf' addParams(options: [:])
workflow test_rseqc_readdistribution {
input = [ [ id:'test', single_end: false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
bed = file(params.test_data['sarscov2']['genome']['test_bed12'], checkIfExists: true)
RSEQC_READDISTRIBUTION ( input, bed )
}

View file

@ -0,0 +1,8 @@
- name: rseqc readdistribution test_rseqc_readdistribution
command: nextflow run tests/software/rseqc/readdistribution -entry test_rseqc_readdistribution -c tests/config/nextflow.config
tags:
- rseqc
- rseqc/readdistribution
files:
- path: output/rseqc/test.read_distribution.txt
md5sum: 56893fdc0809d968629a363551a1655f

View file

@ -0,0 +1,13 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { RSEQC_READDUPLICATION } from '../../../../software/rseqc/readduplication/main.nf' addParams(options: [:])
workflow test_rseqc_readduplication {
input = [ [ id:'test', single_end: false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
RSEQC_READDUPLICATION ( input )
}

View file

@ -0,0 +1,13 @@
- name: rseqc readduplication test_rseqc_readduplication
command: nextflow run tests/software/rseqc/readduplication -entry test_rseqc_readduplication -c tests/config/nextflow.config
tags:
- rseqc/readduplication
- rseqc
files:
- path: output/rseqc/test.DupRate_plot.pdf
- path: output/rseqc/test.DupRate_plot.r
md5sum: 3c0325095cee4835b921e57d61c23dca
- path: output/rseqc/test.pos.DupRate.xls
md5sum: a859bc2031d46bf1cc4336205847caa3
- path: output/rseqc/test.seq.DupRate.xls
md5sum: ee8783399eec5a18522a6f08bece338b