mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Added HISAT2 module and test workflow
This commit is contained in:
parent
a526551300
commit
a2104ca3b9
4 changed files with 133 additions and 0 deletions
60
tools/hisat2/main.nf
Normal file
60
tools/hisat2/main.nf
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
nextflow.preview.dsl=2
|
||||||
|
params.genome = ''
|
||||||
|
|
||||||
|
process HISAT2 {
|
||||||
|
// depending on the genome used one might want/need to adjust the memory settings.
|
||||||
|
// For the E. coli test data this is probably not required
|
||||||
|
// label 'bigMem'
|
||||||
|
// label 'multiCore'
|
||||||
|
|
||||||
|
input:
|
||||||
|
tuple val(name), path(reads)
|
||||||
|
val (outdir)
|
||||||
|
val (hisat2_args)
|
||||||
|
val (verbose)
|
||||||
|
|
||||||
|
output:
|
||||||
|
path "*bam", emit: bam
|
||||||
|
path "*stats.txt", emit: stats
|
||||||
|
|
||||||
|
publishDir "$outdir/hisat2",
|
||||||
|
mode: "copy", overwrite: true
|
||||||
|
|
||||||
|
script:
|
||||||
|
|
||||||
|
if (verbose){
|
||||||
|
println ("[MODULE] HISAT2 ARGS: " + hisat2_args)
|
||||||
|
}
|
||||||
|
|
||||||
|
cores = 4
|
||||||
|
readString = ""
|
||||||
|
hisat_options = hisat2_args
|
||||||
|
|
||||||
|
// Options we add are
|
||||||
|
hisat_options = hisat_options + " --no-unal --no-softclip "
|
||||||
|
|
||||||
|
if (reads instanceof List) {
|
||||||
|
readString = "-1 "+reads[0]+" -2 "+reads[1]
|
||||||
|
hisat_options = hisat_options + " --no-mixed --no-discordant"
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
readString = "-U "+reads
|
||||||
|
}
|
||||||
|
index = params.genome["hisat2"]
|
||||||
|
|
||||||
|
splices = ''
|
||||||
|
if (params.genome.containsKey("hisat2_splices")){
|
||||||
|
splices = " --known-splicesite-infile " + params.genome["hisat2_splices"]
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
println ("No key 'hisat2_splices' was supplied. Skipping...")
|
||||||
|
}
|
||||||
|
hisat_name = name + "_" + params.genome["name"]
|
||||||
|
|
||||||
|
"""
|
||||||
|
module load hisat2
|
||||||
|
module load samtools
|
||||||
|
hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam
|
||||||
|
"""
|
||||||
|
|
||||||
|
}
|
37
tools/hisat2/meta.yml
Normal file
37
tools/hisat2/meta.yml
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
name: HISAT2
|
||||||
|
description: Graph-based alignment of next generation sequencing reads to a population of genomes
|
||||||
|
keywords:
|
||||||
|
- Alignment
|
||||||
|
- Short reads
|
||||||
|
- graph FM Index (GFM)
|
||||||
|
- RNA-seq
|
||||||
|
tools:
|
||||||
|
- fastqc:
|
||||||
|
description: |
|
||||||
|
HISAT2 is a fast and sensitive alignment program for mapping next-generation
|
||||||
|
sequencing reads (whole-genome, transcriptome, and exome sequencing data)
|
||||||
|
against the general human population (as well as against a single reference genome).
|
||||||
|
Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
|
||||||
|
graph FM index (GFM).
|
||||||
|
homepage: http://daehwankimlab.github.io/hisat2/
|
||||||
|
documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
|
||||||
|
input:
|
||||||
|
-
|
||||||
|
- sample_id:
|
||||||
|
type: string
|
||||||
|
description: Sample identifier
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: Input FastQ file, or pair of files
|
||||||
|
output:
|
||||||
|
-
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: mapping statistics report
|
||||||
|
pattern: *hisat2_stats.txt
|
||||||
|
- alignment:
|
||||||
|
type: file
|
||||||
|
description: alignment file in BAM format
|
||||||
|
pattern: *hisat2.bam
|
||||||
|
authors:
|
||||||
|
- @FelixKrueger
|
34
tools/hisat2/test/main.nf
Executable file
34
tools/hisat2/test/main.nf
Executable file
|
@ -0,0 +1,34 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
nextflow.preview.dsl=2
|
||||||
|
|
||||||
|
params.outdir = "."
|
||||||
|
params.genome = ""
|
||||||
|
params.hisat2_args = ''
|
||||||
|
// HISAT2 arguments should be supplied in the following format to work:
|
||||||
|
// --hisat2_args="--score-min L,0,-0.8"
|
||||||
|
|
||||||
|
params.verbose = false
|
||||||
|
|
||||||
|
if (params.verbose){
|
||||||
|
println ("[WORKFLOW] HISAT2 ARGS ARE: " + params.hisat2_args)
|
||||||
|
}
|
||||||
|
// for other genomes this needs to be handled somehow to return all possible genomes
|
||||||
|
genomeValues = ["name" : params.genome]
|
||||||
|
genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}";
|
||||||
|
|
||||||
|
include '../main.nf' params(genome: genomeValues)
|
||||||
|
|
||||||
|
ch_read_files = Channel
|
||||||
|
.fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
|
||||||
|
// .view() // to check whether the input channel works
|
||||||
|
|
||||||
|
workflow {
|
||||||
|
|
||||||
|
main:
|
||||||
|
HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
2
tools/hisat2/test/nextflow.config
Normal file
2
tools/hisat2/test/nextflow.config
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
// docker.enabled = true
|
||||||
|
params.outdir = './results'
|
Loading…
Reference in a new issue