Merge pull request #31 from FelixKrueger/hisat2

Added HISAT2 module and test workflow
2024-12-22 11:08:17 +00:00 · 2020-07-11 13:29:02 +02:00 · 2020-07-11 13:29:02 +02:00 · 81b1d6081c
commit 81b1d6081c
parent edda7433d3 7ef2a97882
4 changed files with 131 additions and 0 deletions
--- a/tools/hisat2/main.nf
+++ b/tools/hisat2/main.nf
@ -0,0 +1,58 @@
+nextflow.preview.dsl=2
+params.genome = ''
+
+process HISAT2 {
+    // depending on the genome used one might want/need to adjust the memory settings.
+    // For the E. coli test data this is probably not required
+    // label 'bigMem'
+    // label 'multiCore'
+
+    input:
+        tuple val(name), path(reads)
+        val (outdir)
+        val (hisat2_args)
+        val (verbose)
+
+    output:
+        path "*bam",       emit: bam
+        path "*stats.txt", emit: stats 
+
+    publishDir "$outdir/hisat2",
+        mode: "copy", overwrite: true
+
+    script:
+    
+        if (verbose){
+            println ("[MODULE] HISAT2 ARGS: " + hisat2_args)
+        }
+    
+        cores = 4
+        readString = ""
+        hisat_options = hisat2_args
+
+        // Options we add are
+        hisat_options = hisat_options + " --no-unal --no-softclip "
+
+        if (reads instanceof List) {
+            readString = "-1 "+reads[0]+" -2 "+reads[1]
+            hisat_options = hisat_options + " --no-mixed --no-discordant"
+        }
+        else {
+            readString = "-U "+reads
+        }
+        index = params.genome["hisat2"]
+        
+        splices = ''
+        if (params.genome.containsKey("hisat2_splices")){
+            splices = " --known-splicesite-infile " + params.genome["hisat2_splices"]
+        }
+        else{
+            println ("No key 'hisat2_splices' was supplied. Skipping...")
+        }
+        hisat_name = name + "_" + params.genome["name"]
+
+        """
+        hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString}  2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam
+        """
+
+}
--- a/tools/hisat2/meta.yml
+++ b/tools/hisat2/meta.yml
@ -0,0 +1,37 @@
+name: HISAT2 
+description: Graph-based alignment of next generation sequencing reads to a population of genomes
+keywords:
+    - Alignment
+    - Short reads
+    - graph FM Index (GFM)
+    - RNA-seq
+tools:
+    - fastqc:
+        description: |
+            HISAT2 is a fast and sensitive alignment program for mapping next-generation
+            sequencing reads (whole-genome, transcriptome, and exome sequencing data)
+            against the general human population (as well as against a single reference genome).
+            Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
+            graph FM index (GFM).
+        homepage: http://daehwankimlab.github.io/hisat2/
+        documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
+input:
+    -
+        - sample_id:
+            type: string
+            description: Sample identifier
+        - reads:
+            type: file
+            description: Input FastQ file, or pair of files
+output:
+    -
+        - report:
+            type: file
+            description: mapping statistics report
+            pattern: *hisat2_stats.txt
+        - alignment:
+            type: file
+            description: alignment file in BAM format
+            pattern: *hisat2.bam
+authors:
+    - @FelixKrueger
--- a/tools/hisat2/test/main.nf
+++ b/tools/hisat2/test/main.nf
@ -0,0 +1,34 @@
+#!/usr/bin/env nextflow
+nextflow.preview.dsl=2
+
+params.outdir = "."
+params.genome = ""
+params.hisat2_args = ''
+// HISAT2 arguments should be supplied in the following format to work:
+// --hisat2_args="--score-min L,0,-0.8"
+
+params.verbose = false
+
+if (params.verbose){
+    println ("[WORKFLOW] HISAT2 ARGS ARE: "       + params.hisat2_args)
+}
+// for other genomes this needs to be handled somehow to return all possible genomes
+genomeValues = ["name" : params.genome]
+genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}";
+
+include '../main.nf'   params(genome: genomeValues)
+
+ch_read_files = Channel
+  .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
+  // .view()  // to check whether the input channel works
+
+workflow {
+
+    main:
+        HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose)
+}
+
+
+
+
+
--- a/tools/hisat2/test/nextflow.config
+++ b/tools/hisat2/test/nextflow.config
@ -0,0 +1,2 @@
+// docker.enabled = true
+params.outdir = './results'