mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 02:58:17 +00:00
Add tests for hisat2 (#366)
* initial commit hisat2/build * initial commit hisat2/build * changed names for hisat2 * fixed directory structure and args * added splice site test data * added splice site inputs * replaced list with individual args * fixed removed commas * added test yml file * updated hisat2 conda version * added meta.yml * added meta.yml description * added meta.yml inputs * added meta.yml outputs * update conda version for hisat2 * removed trailing whitespace meta.yml * fixed version number for containers * added test data to test config * updated for new test logic * fix pytest issue? * fix pytest issue * fixed wrong tool in meta.yaml * updated tets.yaml name * handle build bug for testing * handle build bug for testing in yaml * moved test folder to fix build bug * use old hisat2 version to avoid conda giving inconsistent md5sum * initial commit * removed temp file * added meta yaml * add to pytest * added tests * added test yml * add align meta yaml * add hisat2 align to pytest * remove need for splice data by calling process * add hisat2 align se test * add hisat2 align pe test * update names hisat2 align * update software pytest for using mutiple modules * remove splice site test data since using module instead * remove splice site from config since using module instead * fixed extra brace * added hisat2 align test.yml * removed md5sum for bam files * updated build md5sums * Apply suggestions from code review Co-authored-by: Nicholas TODA <nicholas.toda@mnhn.fr> Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
parent
fead37d57a
commit
e526eae472
11 changed files with 295 additions and 1 deletions
57
software/hisat2/align/meta.yml
Normal file
57
software/hisat2/align/meta.yml
Normal file
|
@ -0,0 +1,57 @@
|
|||
name: hisat2_align
|
||||
description: Align RNA-Seq reads to a reference with HISAT2
|
||||
keywords:
|
||||
- align
|
||||
- fasta
|
||||
- genome
|
||||
- reference
|
||||
|
||||
tools:
|
||||
- hisat2:
|
||||
description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome.
|
||||
homepage: https://daehwankimlab.github.io/hisat2/
|
||||
documentation: https://daehwankimlab.github.io/hisat2/manual/
|
||||
doi: "10.1038/s41587-019-0201-4"
|
||||
licence: ['MIT']
|
||||
|
||||
input:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- index:
|
||||
type: file
|
||||
description: HISAT2 genome index file
|
||||
pattern: "*.ht2"
|
||||
- splicesites:
|
||||
type: file
|
||||
description: Splices sites in gtf file
|
||||
pattern: "*.{txt}"
|
||||
|
||||
output:
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- bam:
|
||||
type: file
|
||||
description: Output BAM file containing read alignments
|
||||
pattern: "*.{bam}"
|
||||
- summary:
|
||||
type: file
|
||||
description: Aligment log
|
||||
pattern: "*.log"
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.{version.txt}"
|
||||
|
||||
authors:
|
||||
- "@ntoda03"
|
42
software/hisat2/build/meta.yml
Normal file
42
software/hisat2/build/meta.yml
Normal file
|
@ -0,0 +1,42 @@
|
|||
name: hisat2_build
|
||||
description: Builds HISAT2 index for reference genome
|
||||
keywords:
|
||||
- build
|
||||
- index
|
||||
- fasta
|
||||
- genome
|
||||
- reference
|
||||
tools:
|
||||
- hisat2:
|
||||
description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome.
|
||||
homepage: https://daehwankimlab.github.io/hisat2/
|
||||
documentation: https://daehwankimlab.github.io/hisat2/manual/
|
||||
doi: "10.1038/s41587-019-0201-4"
|
||||
licence: ['MIT']
|
||||
|
||||
input:
|
||||
- fasta:
|
||||
type: file
|
||||
description: Reference fasta file
|
||||
pattern: "*.{fa,fasta,fna}"
|
||||
- gtf:
|
||||
type: file
|
||||
description: Reference gtf annotation file
|
||||
pattern: "*.{gtf}"
|
||||
- splicesites:
|
||||
type: file
|
||||
description: Splices sites in gtf file
|
||||
pattern: "*.{txt}"
|
||||
|
||||
output:
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.{version.txt}"
|
||||
- index:
|
||||
type: file
|
||||
description: HISAT2 genome index file
|
||||
pattern: "*.ht2"
|
||||
|
||||
authors:
|
||||
- "@ntoda03"
|
34
software/hisat2/extractsplicesites/meta.yml
Normal file
34
software/hisat2/extractsplicesites/meta.yml
Normal file
|
@ -0,0 +1,34 @@
|
|||
name: hisat2_extractsplicesites
|
||||
description: Extracts splicing sites from a gtf files
|
||||
keywords:
|
||||
- splicing
|
||||
- gtf
|
||||
- genome
|
||||
- reference
|
||||
|
||||
tools:
|
||||
- hisat2:
|
||||
description: HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome.
|
||||
homepage: https://daehwankimlab.github.io/hisat2/
|
||||
documentation: https://daehwankimlab.github.io/hisat2/manual/
|
||||
doi: "10.1038/s41587-019-0201-4"
|
||||
licence: ['MIT']
|
||||
|
||||
input:
|
||||
- gtf:
|
||||
type: file
|
||||
description: Reference gtf annotation file
|
||||
pattern: "*.{gtf}"
|
||||
|
||||
output:
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.{version.txt}"
|
||||
- splicesites:
|
||||
type: file
|
||||
description: Splices sites in gtf file
|
||||
pattern: "*.{splice_sites.txt}"
|
||||
|
||||
authors:
|
||||
- "@ntoda03"
|
|
@ -234,6 +234,21 @@ gunzip:
|
|||
- software/gunzip/**
|
||||
- tests/software/gunzip/**
|
||||
|
||||
hisat2_align:
|
||||
- software/hisat2/align/**
|
||||
- software/hisat2/build/**
|
||||
- software/hisat2/extractsplicesites/**
|
||||
- tests/software/hisat2/align/**
|
||||
|
||||
hisat2_build:
|
||||
- software/hisat2/build/**
|
||||
- software/hisat2/extractsplicesites/**
|
||||
- tests/software/hisat2/build_test/**
|
||||
|
||||
hisat2_extractsplicesites:
|
||||
- software/hisat2/extractsplicesites/**
|
||||
- tests/software/hisat2/extractsplicesites/**
|
||||
|
||||
homer_annotatepeaks:
|
||||
- software/homer/annotatepeaks/**
|
||||
- tests/software/homer/annotatepeaks/**
|
||||
|
|
|
@ -67,4 +67,4 @@ params {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
32
tests/software/hisat2/align/main.nf
Normal file
32
tests/software/hisat2/align/main.nf
Normal file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { HISAT2_EXTRACTSPLICESITES } from '../../../../software/hisat2/extractsplicesites/main.nf' addParams( options: [:] )
|
||||
include { HISAT2_BUILD } from '../../../../software/hisat2/build/main.nf' addParams( options: [:] )
|
||||
include { HISAT2_ALIGN } from '../../../../software/hisat2/align/main.nf' addParams( options: [:] )
|
||||
|
||||
workflow test_hisat2_align_single_end {
|
||||
input = [ [ id:'test', single_end:true ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
|
||||
]
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
HISAT2_EXTRACTSPLICESITES ( gtf )
|
||||
HISAT2_BUILD ( fasta, gtf, HISAT2_EXTRACTSPLICESITES.out.txt )
|
||||
HISAT2_ALIGN ( input, HISAT2_BUILD.out.index, HISAT2_EXTRACTSPLICESITES.out.txt )
|
||||
}
|
||||
|
||||
workflow test_hisat2_align_paired_end {
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
|
||||
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)]
|
||||
]
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
HISAT2_EXTRACTSPLICESITES ( gtf )
|
||||
HISAT2_BUILD ( fasta, gtf, HISAT2_EXTRACTSPLICESITES.out.txt )
|
||||
HISAT2_ALIGN ( input, HISAT2_BUILD.out.index, HISAT2_EXTRACTSPLICESITES.out.txt )
|
||||
}
|
57
tests/software/hisat2/align/test.yml
Normal file
57
tests/software/hisat2/align/test.yml
Normal file
|
@ -0,0 +1,57 @@
|
|||
- name: hisat2 align test_hisat2_align_single_end
|
||||
command: nextflow run tests/software/hisat2/align -entry test_hisat2_align_single_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- hisat2_align_single_end
|
||||
- hisat2_align
|
||||
- hisat2
|
||||
files:
|
||||
- path: output/hisat2/test.hisat2.summary.log
|
||||
md5sum: 7b8a9e61b7646da1089b041333c41a87
|
||||
- path: output/hisat2/genome.splice_sites.txt
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||
- path: output/hisat2/test.bam
|
||||
- path: output/index/hisat2/genome.5.ht2
|
||||
md5sum: 91198831aaba993acac1734138c5f173
|
||||
- path: output/index/hisat2/genome.7.ht2
|
||||
md5sum: 9013eccd91ad614d7893c739275a394f
|
||||
- path: output/index/hisat2/genome.1.ht2
|
||||
md5sum: 3ea3dc41304941ad8d047e4d71b4899e
|
||||
- path: output/index/hisat2/genome.2.ht2
|
||||
md5sum: 47b153cd1319abc88dda532462651fcf
|
||||
- path: output/index/hisat2/genome.6.ht2
|
||||
md5sum: 265e1284ce85686516fae5d35540994a
|
||||
- path: output/index/hisat2/genome.3.ht2
|
||||
md5sum: 4ed93abba181d8dfab2e303e33114777
|
||||
- path: output/index/hisat2/genome.8.ht2
|
||||
md5sum: 33cdeccccebe80329f1fdbee7f5874cb
|
||||
- path: output/index/hisat2/genome.4.ht2
|
||||
md5sum: c25be5f8b0378abf7a58c8a880b87626
|
||||
|
||||
- name: hisat2 align test_hisat2_align_paired_end
|
||||
command: nextflow run tests/software/hisat2/align -entry test_hisat2_align_paired_end -c tests/config/nextflow.config
|
||||
tags:
|
||||
- hisat2_align_paired_end
|
||||
- hisat2_align
|
||||
- hisat2
|
||||
files:
|
||||
- path: output/hisat2/test.hisat2.summary.log
|
||||
md5sum: 9839b31db795958cc4b70711a3414e9c
|
||||
- path: output/hisat2/genome.splice_sites.txt
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||
- path: output/hisat2/test.bam
|
||||
- path: output/index/hisat2/genome.5.ht2
|
||||
md5sum: 91198831aaba993acac1734138c5f173
|
||||
- path: output/index/hisat2/genome.7.ht2
|
||||
md5sum: 9013eccd91ad614d7893c739275a394f
|
||||
- path: output/index/hisat2/genome.1.ht2
|
||||
md5sum: 3ea3dc41304941ad8d047e4d71b4899e
|
||||
- path: output/index/hisat2/genome.2.ht2
|
||||
md5sum: 47b153cd1319abc88dda532462651fcf
|
||||
- path: output/index/hisat2/genome.6.ht2
|
||||
md5sum: 265e1284ce85686516fae5d35540994a
|
||||
- path: output/index/hisat2/genome.3.ht2
|
||||
md5sum: 4ed93abba181d8dfab2e303e33114777
|
||||
- path: output/index/hisat2/genome.8.ht2
|
||||
md5sum: 33cdeccccebe80329f1fdbee7f5874cb
|
||||
- path: output/index/hisat2/genome.4.ht2
|
||||
md5sum: c25be5f8b0378abf7a58c8a880b87626
|
14
tests/software/hisat2/build_test/main.nf
Normal file
14
tests/software/hisat2/build_test/main.nf
Normal file
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { HISAT2_EXTRACTSPLICESITES } from '../../../../software/hisat2/extractsplicesites/main.nf' addParams( options: [:] )
|
||||
include { HISAT2_BUILD } from '../../../../software/hisat2/build/main.nf' addParams( options: [:] )
|
||||
|
||||
workflow test_hisat2_build {
|
||||
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
|
||||
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
HISAT2_EXTRACTSPLICESITES ( gtf )
|
||||
HISAT2_BUILD ( fasta, gtf, HISAT2_EXTRACTSPLICESITES.out.txt )
|
||||
}
|
24
tests/software/hisat2/build_test/test.yml
Normal file
24
tests/software/hisat2/build_test/test.yml
Normal file
|
@ -0,0 +1,24 @@
|
|||
- name: hisat2 build test_hisat2_build
|
||||
command: nextflow run tests/software/hisat2/build_test -entry test_hisat2_build -c tests/config/nextflow.config
|
||||
tags:
|
||||
- hisat2
|
||||
- hisat2_build
|
||||
files:
|
||||
- path: output/hisat2/genome.splice_sites.txt
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
||||
- path: output/index/hisat2/genome.5.ht2
|
||||
md5sum: 91198831aaba993acac1734138c5f173
|
||||
- path: output/index/hisat2/genome.7.ht2
|
||||
md5sum: 9013eccd91ad614d7893c739275a394f
|
||||
- path: output/index/hisat2/genome.1.ht2
|
||||
md5sum: 3ea3dc41304941ad8d047e4d71b4899e
|
||||
- path: output/index/hisat2/genome.2.ht2
|
||||
md5sum: 47b153cd1319abc88dda532462651fcf
|
||||
- path: output/index/hisat2/genome.6.ht2
|
||||
md5sum: 265e1284ce85686516fae5d35540994a
|
||||
- path: output/index/hisat2/genome.3.ht2
|
||||
md5sum: 4ed93abba181d8dfab2e303e33114777
|
||||
- path: output/index/hisat2/genome.8.ht2
|
||||
md5sum: 33cdeccccebe80329f1fdbee7f5874cb
|
||||
- path: output/index/hisat2/genome.4.ht2
|
||||
md5sum: c25be5f8b0378abf7a58c8a880b87626
|
11
tests/software/hisat2/extractsplicesites/main.nf
Normal file
11
tests/software/hisat2/extractsplicesites/main.nf
Normal file
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env nextflow
|
||||
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
include { HISAT2_EXTRACTSPLICESITES } from '../../../../software/hisat2/extractsplicesites/main.nf' addParams( options: [:] )
|
||||
|
||||
workflow test_hisat2_extractsplicesites {
|
||||
gtf = file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
|
||||
|
||||
HISAT2_EXTRACTSPLICESITES ( gtf )
|
||||
}
|
8
tests/software/hisat2/extractsplicesites/test.yml
Normal file
8
tests/software/hisat2/extractsplicesites/test.yml
Normal file
|
@ -0,0 +1,8 @@
|
|||
- name: hisat2 extractsplicesites test_hisat2_extractsplicesites
|
||||
command: nextflow run tests/software/hisat2/extractsplicesites -entry test_hisat2_extractsplicesites -c tests/config/nextflow.config
|
||||
tags:
|
||||
- hisat2
|
||||
- hisat2_extractsplicesites
|
||||
files:
|
||||
- path: output/hisat2/genome.splice_sites.txt
|
||||
md5sum: d41d8cd98f00b204e9800998ecf8427e
|
Loading…
Reference in a new issue