Merge pull request #121 from KevinMenden/master

Adding test for STAR
This commit is contained in:
Harshil Patel 2021-01-26 13:01:27 +00:00 committed by GitHub
commit 3ef16b7cfd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 76885 additions and 13 deletions

40
.github/workflows/star_align.yml vendored Normal file
View file

@ -0,0 +1,40 @@
name: star_genomegenerate
on:
push:
paths:
- software/star/align/**
- .github/workflows/star_align.yml
- tests/software/star/**
pull_request:
paths:
- software/star/align/**
- .github/workflows/star_align.yml
- tests/software/star/**
jobs:
ci_test:
runs-on: ubuntu-latest
strategy:
matrix:
nxf_version: [20.11.0-edge]
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
env:
NXF_VER: ${{ matrix.nxf_version }}
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"
- name: Install dependencies
run: python -m pip install --upgrade pip pytest-workflow
# Test the module
- run: pytest --tag star_align --symlink --wt 2

View file

@ -0,0 +1,40 @@
name: star_genomegenerate
on:
push:
paths:
- software/star/genomegenerate/**
- .github/workflows/star_genomegenerate.yml
- tests/software/star/**
pull_request:
paths:
- software/star/genomegenerate/**
- .github/workflows/star_genomegenerate.yml
- tests/software/star/**
jobs:
ci_test:
runs-on: ubuntu-latest
strategy:
matrix:
nxf_version: [20.11.0-edge]
env:
NXF_ANSI_LOG: false
steps:
- uses: actions/checkout@v2
- name: Install Nextflow
env:
NXF_VER: ${{ matrix.nxf_version }}
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"
- name: Install dependencies
run: python -m pip install --upgrade pip pytest-workflow
# Test the module
- run: pytest --tag star_genomegenerate --symlink --wt 2

View file

@ -12,46 +12,51 @@ process STAR_ALIGN {
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
// Note: 2.7X indices incompatible with AWS iGenomes. // Note: 2.7X indices incompatible with AWS iGenomes.
conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) conda (params.enable_conda ? 'bioconda::star=2.6.1d' : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/star:2.6.1d--0" container 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0'
} else { } else {
container "quay.io/biocontainers/star:2.6.1d--0" container 'quay.io/biocontainers/star:2.6.1d--0'
} }
input: input:
tuple val(meta), path(reads) tuple val(meta), path(reads)
path index path index
path gtf path gtf
output: output:
tuple val(meta), path("*Aligned.out.bam") , emit: bam tuple val(meta), path('*d.out.bam') , emit: bam
tuple val(meta), path("*Log.final.out") , emit: log_final tuple val(meta), path('*Log.final.out') , emit: log_final
tuple val(meta), path("*Log.out") , emit: log_out tuple val(meta), path('*Log.out') , emit: log_out
tuple val(meta), path("*Log.progress.out"), emit: log_progress tuple val(meta), path('*Log.progress.out'), emit: log_progress
path "*.version.txt" , emit: version path '*.version.txt' , emit: version
tuple val(meta), path("*sortedByCoord.out.bam") , optional:true, emit: bam_sorted tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted
tuple val(meta), path("*toTranscriptome.out.bam"), optional:true, emit: bam_transcript tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript
tuple val(meta), path("*fastq.gz") , optional:true, emit: fastq tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted
tuple val(meta), path("*.tab") , optional:true, emit: tab tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq
tuple val(meta), path('*.tab') , optional:true, emit: tab
script: script:
def software = getSoftwareName(task.process) def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def ignore_gtf = params.star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" def ignore_gtf = params.star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf"
def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'"
def out_sam_type = (options.args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted'
def mv_unsorted_bam = (options.args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : ''
""" """
STAR \\ STAR \\
--genomeDir $index \\ --genomeDir $index \\
--readFilesIn $reads \\ --readFilesIn $reads \\
--runThreadN $task.cpus \\ --runThreadN $task.cpus \\
--outFileNamePrefix $prefix. \\ --outFileNamePrefix $prefix. \\
$out_sam_type \\
$ignore_gtf \\ $ignore_gtf \\
$seq_center \\ $seq_center \\
$options.args $options.args
$mv_unsorted_bam
if [ -f ${prefix}.Unmapped.out.mate1 ]; then if [ -f ${prefix}.Unmapped.out.mate1 ]; then
mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq
gzip ${prefix}.unmapped_1.fastq gzip ${prefix}.unmapped_1.fastq

View file

@ -0,0 +1,97 @@
name: star_align
description: Align reads to a reference genome using STAR
keywords:
- align
- fasta
- genome
- reference
tools:
- star:
description: |
STAR is a software package for mapping DNA sequences against
a large reference genome, such as the human genome.
homepage: https://github.com/alexdobin/STAR
manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
doi: 10.1093/bioinformatics/bts635
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
- singularity_pull_docker_container:
type: boolean
description: |
Instead of directly downloading Singularity images for use with Singularity,
force the workflow to pull and convert Docker containers instead.
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
- index:
type: directory
description: STAR genome index
pattern: "star"
output:
- bam:
type: file
description: Output BAM file containing read alignments
pattern: "*.{bam}"
- log_final:
type: file
description: STAR final log file
pattern: "*Log.final.out"
- log_out:
type: file
description: STAR lot out file
pattern: "*Log.out"
- log_progress:
type: file
description: STAR log progress file
pattern: "*Log.progress.out"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
- bam_sorted:
type: file
description: Sorted BAM file of read alignments (optional)
pattern: "*sortedByCoord.out.bam"
- bam_transcript:
type: file
description: Output BAM file of transcriptome alignment (optional)
pattern: "*toTranscriptome.out.bam"
- bam_unsorted:
type: file
description: Unsorted BAM file of read alignments (optional)
pattern: "*Aligned.unsort.out.bam"
- fastq:
type: file
description: Unmapped FastQ files (optional)
pattern: "*fastq.gz"
- tab:
type: file
description: STAR output tab file(s) (optional)
pattern: "*.tab"
authors:
- "@kevinmenden"
- "@drpatelh"

View file

@ -0,0 +1,58 @@
name: star_genomegenerate
description: Create index for STAR
keywords:
- index
- fasta
- genome
- reference
tools:
- star:
description: |
STAR is a software package for mapping DNA sequences against
a large reference genome, such as the human genome.
homepage: https://github.com/alexdobin/STAR
manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf
doi: 10.1093/bioinformatics/bts635
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
- singularity_pull_docker_container:
type: boolean
description: |
Instead of directly downloading Singularity images for use with Singularity,
force the workflow to pull and convert Docker containers instead.
input:
- fasta:
type: file
description: Fasta file of the reference genome
- gtf:
type: file
description: GTF file of the reference genome
output:
- index:
type: directory
description: Folder containing the star index files
pattern: "star"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@kevinmenden"
- "@drpatelh"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
def options_align = [args: '--readFilesCommand zcat']
def options_gg = [args: '--genomeSAindexNbases 9']
include { STAR_ALIGN } from '../../../software/star/align/main.nf' addParams( options: options_align )
include { STAR_GENOMEGENERATE } from '../../../software/star/genomegenerate/main.nf' addParams( options: options_gg )
workflow test_star_genomegenerate {
fasta = file("${launchDir}/tests/data/fasta/E_coli/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true)
gtf = file("${launchDir}/tests/data/gff/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true)
STAR_GENOMEGENERATE ( fasta, gtf )
}
workflow test_star_alignment_single_end {
fasta = file("${launchDir}/tests/data/fasta/E_coli/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true)
gtf = file("${launchDir}/tests/data/gff/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true)
STAR_GENOMEGENERATE ( fasta, gtf )
input = [ [ id:'test', single_end:true ], // meta map
[ file("${launchDir}/tests/data/fastq/rna/test_single_end.fastq.gz", checkIfExists: true) ] ]
STAR_ALIGN( input, STAR_GENOMEGENERATE.out.index, gtf)
}
workflow test_star_alignment_paired_end {
fasta = file("${launchDir}/tests/data/fasta/E_coli/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true)
gtf = file("${launchDir}/tests/data/gff/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true)
STAR_GENOMEGENERATE ( fasta, gtf )
input = [ [ id:'test', single_end:false ], // meta map
[ file("${launchDir}/tests/data/fastq/rna/test_R1.fastq.gz", checkIfExists: true),
file("${launchDir}/tests/data/fastq/rna/test_R2.fastq.gz", checkIfExists: true) ] ]
STAR_ALIGN( input, STAR_GENOMEGENERATE.out.index, gtf)
}

View file

@ -0,0 +1,108 @@
- name: Run star genomegenerate
command: nextflow run ./tests/software/star -profile docker -entry test_star_genomegenerate -c tests/config/nextflow.config
tags:
- star
- star_genomegenerate
files:
- path: output/star/star/Genome
md5sum: 323c992bac354f93073ce0fc43f222f8
- path: output/star/star/SA
md5sum: 3e70e4fc6d031e1915bb510727f2c559
- path: output/star/star/SAindex
md5sum: a94198b95a245d4f64af2a7133b6ec7b
- path: output/star/star/chrLength.txt
md5sum: f2bea3725fe1c01420c57fb73bdeb31a
- path: output/star/star/chrNameLength.txt
md5sum: c7ceb0a8827b2ea91c386933bee48742
- path: output/star/star/chrStart.txt
md5sum: faf5c55020c99eceeef3e34188ac0d2f
- path: output/star/star/exonGeTrInfo.tab
md5sum: aec6e7a1ae3fc8c638ce5a9ce9c886b6
- path: output/star/star/exonInfo.tab
md5sum: 42eca6ebc2dc72d9d6e6b3acd3714343
- path: output/star/star/genomeParameters.txt
md5sum: ed47b8b034cae2fefcdb39321aea47cd
- path: output/star/star/sjdbInfo.txt
md5sum: 1082ab459363b3f2f7aabcef0979c1ed
- path: output/star/star/sjdbList.fromGTF.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/sjdbList.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/transcriptInfo.tab
md5sum: 8fbe69abbbef4f89da3854873984dbac
- name: Run star single-end alignment
command: nextflow run ./tests/software/star -profile docker -entry test_star_alignment_single_end -c tests/config/nextflow.config
tags:
- star
- star_align
- star_alignment_single_end
files:
- path: output/star/star/Genome
md5sum: 323c992bac354f93073ce0fc43f222f8
- path: output/star/star/SA
md5sum: 3e70e4fc6d031e1915bb510727f2c559
- path: output/star/star/SAindex
md5sum: a94198b95a245d4f64af2a7133b6ec7b
- path: output/star/star/chrLength.txt
md5sum: f2bea3725fe1c01420c57fb73bdeb31a
- path: output/star/star/chrNameLength.txt
md5sum: c7ceb0a8827b2ea91c386933bee48742
- path: output/star/star/chrStart.txt
md5sum: faf5c55020c99eceeef3e34188ac0d2f
- path: output/star/star/exonGeTrInfo.tab
md5sum: aec6e7a1ae3fc8c638ce5a9ce9c886b6
- path: output/star/star/exonInfo.tab
md5sum: 42eca6ebc2dc72d9d6e6b3acd3714343
- path: output/star/star/genomeParameters.txt
md5sum: ed47b8b034cae2fefcdb39321aea47cd
- path: output/star/star/sjdbInfo.txt
md5sum: 1082ab459363b3f2f7aabcef0979c1ed
- path: output/star/star/sjdbList.fromGTF.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/sjdbList.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/transcriptInfo.tab
md5sum: 8fbe69abbbef4f89da3854873984dbac
- path: output/star/test.Aligned.out.bam
md5sum: d7f59c1728482e76a18e3f6eb9c66c25
- path: output/star/test.SJ.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- name: Run star paired-end alignment
command: nextflow run ./tests/software/star -profile docker -entry test_star_alignment_paired_end -c tests/config/nextflow.config
tags:
- star
- star_align
- star_alignment_paired_end
files:
- path: output/star/star/Genome
md5sum: 323c992bac354f93073ce0fc43f222f8
- path: output/star/star/SA
md5sum: 3e70e4fc6d031e1915bb510727f2c559
- path: output/star/star/SAindex
md5sum: a94198b95a245d4f64af2a7133b6ec7b
- path: output/star/star/chrLength.txt
md5sum: f2bea3725fe1c01420c57fb73bdeb31a
- path: output/star/star/chrNameLength.txt
md5sum: c7ceb0a8827b2ea91c386933bee48742
- path: output/star/star/chrStart.txt
md5sum: faf5c55020c99eceeef3e34188ac0d2f
- path: output/star/star/exonGeTrInfo.tab
md5sum: aec6e7a1ae3fc8c638ce5a9ce9c886b6
- path: output/star/star/exonInfo.tab
md5sum: 42eca6ebc2dc72d9d6e6b3acd3714343
- path: output/star/star/genomeParameters.txt
md5sum: ed47b8b034cae2fefcdb39321aea47cd
- path: output/star/star/sjdbInfo.txt
md5sum: 1082ab459363b3f2f7aabcef0979c1ed
- path: output/star/star/sjdbList.fromGTF.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/sjdbList.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/star/star/transcriptInfo.tab
md5sum: 8fbe69abbbef4f89da3854873984dbac
- path: output/star/test.Aligned.out.bam
md5sum: 5c52bcaa15e86914ad6b895638235166
- path: output/star/test.SJ.out.tab
md5sum: d41d8cd98f00b204e9800998ecf8427e