Added HISAT2 module and test workflow

2024-12-22 11:08:17 +00:00 · 2020-03-20 11:04:25 +00:00 · 2020-03-20 11:04:25 +00:00 · a2104ca3b9
commit a2104ca3b9
parent a526551300
4 changed files with 133 additions and 0 deletions
--- a/tools/hisat2/main.nf
+++ b/tools/hisat2/main.nf
@ -0,0 +1,60 @@
 nextflow.preview.dsl=2
 params.genome = ''
 process HISAT2 {
    // depending on the genome used one might want/need to adjust the memory settings.
    // For the E. coli test data this is probably not required
    // label 'bigMem'
    // label 'multiCore'
    input:
        tuple val(name), path(reads)
        val (outdir)
        val (hisat2_args)
        val (verbose)
    output:
        path "*bam",       emit: bam
        path "*stats.txt", emit: stats 
    publishDir "$outdir/hisat2",
        mode: "copy", overwrite: true
    script:
        if (verbose){
            println ("[MODULE] HISAT2 ARGS: " + hisat2_args)
        }
        cores = 4
        readString = ""
        hisat_options = hisat2_args
        // Options we add are
        hisat_options = hisat_options + " --no-unal --no-softclip "
        if (reads instanceof List) {
            readString = "-1 "+reads[0]+" -2 "+reads[1]
            hisat_options = hisat_options + " --no-mixed --no-discordant"
        }
        else {
            readString = "-U "+reads
        }
        index = params.genome["hisat2"]
        splices = ''
        if (params.genome.containsKey("hisat2_splices")){
            splices = " --known-splicesite-infile " + params.genome["hisat2_splices"]
        }
        else{
            println ("No key 'hisat2_splices' was supplied. Skipping...")
        }
        hisat_name = name + "_" + params.genome["name"]
        """
        module load hisat2
        module load samtools
        hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString}  2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam
        """
 }
--- a/tools/hisat2/meta.yml
+++ b/tools/hisat2/meta.yml
@ -0,0 +1,37 @@
 name: HISAT2 
 description: Graph-based alignment of next generation sequencing reads to a population of genomes
 keywords:
    - Alignment
    - Short reads
    - graph FM Index (GFM)
    - RNA-seq
 tools:
    - fastqc:
        description: |
            HISAT2 is a fast and sensitive alignment program for mapping next-generation
            sequencing reads (whole-genome, transcriptome, and exome sequencing data)
            against the general human population (as well as against a single reference genome).
            Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a
            graph FM index (GFM).
        homepage: http://daehwankimlab.github.io/hisat2/
        documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml
 input:
    -
        - sample_id:
            type: string
            description: Sample identifier
        - reads:
            type: file
            description: Input FastQ file, or pair of files
 output:
    -
        - report:
            type: file
            description: mapping statistics report
            pattern: *hisat2_stats.txt
        - alignment:
            type: file
            description: alignment file in BAM format
            pattern: *hisat2.bam
 authors:
    - @FelixKrueger
--- a/tools/hisat2/test/main.nf
+++ b/tools/hisat2/test/main.nf
@ -0,0 +1,34 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl=2
 params.outdir = "."
 params.genome = ""
 params.hisat2_args = ''
 // HISAT2 arguments should be supplied in the following format to work:
 // --hisat2_args="--score-min L,0,-0.8"
 params.verbose = false
 if (params.verbose){
    println ("[WORKFLOW] HISAT2 ARGS ARE: "       + params.hisat2_args)
 }
 // for other genomes this needs to be handled somehow to return all possible genomes
 genomeValues = ["name" : params.genome]
 genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}";
 include '../main.nf'   params(genome: genomeValues)
 ch_read_files = Channel
  .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1)
  // .view()  // to check whether the input channel works
 workflow {
    main:
        HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose)
 }
--- a/tools/hisat2/test/nextflow.config
+++ b/tools/hisat2/test/nextflow.config
@ -0,0 +1,2 @@
 // docker.enabled = true
 params.outdir = './results'
		`@ -0,0 +1,2 @@`
							`// docker.enabled = true`
							`params.outdir = './results'`