From a2104ca3b9307439d694bd339b8d12d645ba7b3a Mon Sep 17 00:00:00 2001 From: FelixKrueger Date: Fri, 20 Mar 2020 11:04:25 +0000 Subject: [PATCH 1/3] Added HISAT2 module and test workflow --- tools/hisat2/main.nf | 60 +++++++++++++++++++++++++++++++ tools/hisat2/meta.yml | 37 +++++++++++++++++++ tools/hisat2/test/main.nf | 34 ++++++++++++++++++ tools/hisat2/test/nextflow.config | 2 ++ 4 files changed, 133 insertions(+) create mode 100644 tools/hisat2/main.nf create mode 100644 tools/hisat2/meta.yml create mode 100755 tools/hisat2/test/main.nf create mode 100644 tools/hisat2/test/nextflow.config diff --git a/tools/hisat2/main.nf b/tools/hisat2/main.nf new file mode 100644 index 00000000..e9d33993 --- /dev/null +++ b/tools/hisat2/main.nf @@ -0,0 +1,60 @@ +nextflow.preview.dsl=2 +params.genome = '' + +process HISAT2 { + // depending on the genome used one might want/need to adjust the memory settings. + // For the E. coli test data this is probably not required + // label 'bigMem' + // label 'multiCore' + + input: + tuple val(name), path(reads) + val (outdir) + val (hisat2_args) + val (verbose) + + output: + path "*bam", emit: bam + path "*stats.txt", emit: stats + + publishDir "$outdir/hisat2", + mode: "copy", overwrite: true + + script: + + if (verbose){ + println ("[MODULE] HISAT2 ARGS: " + hisat2_args) + } + + cores = 4 + readString = "" + hisat_options = hisat2_args + + // Options we add are + hisat_options = hisat_options + " --no-unal --no-softclip " + + if (reads instanceof List) { + readString = "-1 "+reads[0]+" -2 "+reads[1] + hisat_options = hisat_options + " --no-mixed --no-discordant" + } + else { + readString = "-U "+reads + } + index = params.genome["hisat2"] + + splices = '' + if (params.genome.containsKey("hisat2_splices")){ + splices = " --known-splicesite-infile " + params.genome["hisat2_splices"] + } + else{ + println ("No key 'hisat2_splices' was supplied. Skipping...") + } + hisat_name = name + "_" + params.genome["name"] + + """ + module load hisat2 + module load samtools + hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam + """ + +} \ No newline at end of file diff --git a/tools/hisat2/meta.yml b/tools/hisat2/meta.yml new file mode 100644 index 00000000..ee79ba99 --- /dev/null +++ b/tools/hisat2/meta.yml @@ -0,0 +1,37 @@ +name: HISAT2 +description: Graph-based alignment of next generation sequencing reads to a population of genomes +keywords: + - Alignment + - Short reads + - graph FM Index (GFM) + - RNA-seq +tools: + - fastqc: + description: | + HISAT2 is a fast and sensitive alignment program for mapping next-generation + sequencing reads (whole-genome, transcriptome, and exome sequencing data) + against the general human population (as well as against a single reference genome). + Based on GCSA (an extension of BWT for a graph) it is designed and implemented as a + graph FM index (GFM). + homepage: http://daehwankimlab.github.io/hisat2/ + documentation: https://ccb.jhu.edu/software/hisat2/manual.shtml +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - report: + type: file + description: mapping statistics report + pattern: *hisat2_stats.txt + - alignment: + type: file + description: alignment file in BAM format + pattern: *hisat2.bam +authors: + - @FelixKrueger diff --git a/tools/hisat2/test/main.nf b/tools/hisat2/test/main.nf new file mode 100755 index 00000000..f846c94c --- /dev/null +++ b/tools/hisat2/test/main.nf @@ -0,0 +1,34 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl=2 + +params.outdir = "." +params.genome = "" +params.hisat2_args = '' +// HISAT2 arguments should be supplied in the following format to work: +// --hisat2_args="--score-min L,0,-0.8" + +params.verbose = false + +if (params.verbose){ + println ("[WORKFLOW] HISAT2 ARGS ARE: " + params.hisat2_args) +} +// for other genomes this needs to be handled somehow to return all possible genomes +genomeValues = ["name" : params.genome] +genomeValues["hisat2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/hisat2/E_coli/${params.genome}"; + +include '../main.nf' params(genome: genomeValues) + +ch_read_files = Channel + .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1) + // .view() // to check whether the input channel works + +workflow { + + main: + HISAT2(ch_read_files, params.outdir, params.hisat2_args, params.verbose) +} + + + + + diff --git a/tools/hisat2/test/nextflow.config b/tools/hisat2/test/nextflow.config new file mode 100644 index 00000000..63c458ca --- /dev/null +++ b/tools/hisat2/test/nextflow.config @@ -0,0 +1,2 @@ +// docker.enabled = true +params.outdir = './results' From 23c2fd8520793b2961562992b044661fb135806d Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 11 Jul 2020 13:27:56 +0200 Subject: [PATCH 2/3] Apply suggestions from code review --- tools/bowtie2/main.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/bowtie2/main.nf b/tools/bowtie2/main.nf index e5ebf47f..6d1dc55e 100644 --- a/tools/bowtie2/main.nf +++ b/tools/bowtie2/main.nf @@ -45,10 +45,7 @@ process BOWTIE2 { index = params.genome["bowtie2"] bowtie2_name = name + "_" + params.genome["name"] - println ("bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam") """ - module load bowtie2 - module load samtools bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam """ From 7ef2a978829274606974750e0d53578ceacd83e4 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Sat, 11 Jul 2020 13:28:41 +0200 Subject: [PATCH 3/3] Update tools/hisat2/main.nf --- tools/hisat2/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/hisat2/main.nf b/tools/hisat2/main.nf index e9d33993..55e6fe78 100644 --- a/tools/hisat2/main.nf +++ b/tools/hisat2/main.nf @@ -52,9 +52,7 @@ process HISAT2 { hisat_name = name + "_" + params.genome["name"] """ - module load hisat2 - module load samtools hisat2 -p ${cores} ${hisat_options} -x ${index} ${splices} ${readString} 2>${hisat_name}_hisat2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${hisat_name}_hisat2.bam """ -} \ No newline at end of file +}