From a5265513001fdbcea14388670fdfabe5ae2367f6 Mon Sep 17 00:00:00 2001 From: FelixKrueger Date: Thu, 19 Mar 2020 18:13:15 +0000 Subject: [PATCH] Added Bowtie2 module and test workflow --- tools/bowtie2/main.nf | 55 ++++++++++++++++++++++++++++++ tools/bowtie2/meta.yml | 37 ++++++++++++++++++++ tools/bowtie2/test/main.nf | 31 +++++++++++++++++ tools/bowtie2/test/nextflow.config | 2 ++ 4 files changed, 125 insertions(+) create mode 100644 tools/bowtie2/main.nf create mode 100644 tools/bowtie2/meta.yml create mode 100755 tools/bowtie2/test/main.nf create mode 100644 tools/bowtie2/test/nextflow.config diff --git a/tools/bowtie2/main.nf b/tools/bowtie2/main.nf new file mode 100644 index 00000000..e5ebf47f --- /dev/null +++ b/tools/bowtie2/main.nf @@ -0,0 +1,55 @@ +nextflow.preview.dsl=2 +params.genome = '' + +process BOWTIE2 { + // depending on the genome used one might want/need to adjust the memory settings. + // For the E. coli test data this is probably not required + + // label 'bigMem' + // label 'multiCore' + + input: + tuple val(name), path(reads) + val (outdir) + val (bowtie2_args) + val (verbose) + + output: + path "*bam", emit: bam + path "*stats.txt", emit: stats + + publishDir "$outdir/bowtie2", + mode: "copy", overwrite: true + + script: + if (verbose){ + println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args) + } + + cores = 4 + + readString = "" + + // Options we add are + bowtie2_options = bowtie2_args + bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file + + // single-end / paired-end distinction. Might also be handled via params.single_end + if (reads instanceof List) { + readString = "-1 " + reads[0] + " -2 " + reads[1] + } + else { + readString = "-U " + reads + } + + index = params.genome["bowtie2"] + bowtie2_name = name + "_" + params.genome["name"] + + println ("bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam") + """ + module load bowtie2 + module load samtools + bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam + """ + +} diff --git a/tools/bowtie2/meta.yml b/tools/bowtie2/meta.yml new file mode 100644 index 00000000..b3b31a94 --- /dev/null +++ b/tools/bowtie2/meta.yml @@ -0,0 +1,37 @@ +name: Bowtie 2 +description: Ultrafast alignment to reference genome +keywords: + - Alignment + - Short reads + - FM Index +tools: + - fastqc: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning sequencing reads + to long reference sequences. It is particularly good at aligning reads of about + 50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively + long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index to keep + its memory footprint small: for the human genome, its memory footprint is typically + around 3.2 GB. Bowtie 2 supports gapped, local, and paired-end alignment modes. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - report: + type: file + description: mapping statistics report + pattern: *bowtie2_stats.txt + - alignment: + type: file + description: alignment file in BAM format + pattern: *bowtie2.bam +authors: + - @FelixKrueger diff --git a/tools/bowtie2/test/main.nf b/tools/bowtie2/test/main.nf new file mode 100755 index 00000000..5c02914b --- /dev/null +++ b/tools/bowtie2/test/main.nf @@ -0,0 +1,31 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl=2 + +params.outdir = "." +params.genome = "" +params.bowtie2_args = '' +// Bowtie2 arguments should be supplied in the following format to work: +// --bowtie2_args="--score-min L,0,-0.8" + +params.verbose = false + +if (params.verbose){ + println ("[WORKFLOW] BOWTIE2 ARGS: " + params.bowtie2_args) +} + +// for other genomes this needs to be handled somehow to return all possible genomes +genomeValues = ["name" : params.genome] +genomeValues["bowtie2"] = "/bi/home/fkrueger/VersionControl/nf-core-modules/test-datasets/indices/bowtie2/E_coli/${params.genome}"; + +include '../main.nf' params(genome: genomeValues) + +ch_read_files = Channel + .fromFilePairs('../../../test-datasets/Ecoli*{1,2}.fastq.gz',size:-1) + // .view() // to check whether the input channel works + +workflow { + + main: + BOWTIE2(ch_read_files, params.outdir, params.bowtie2_args, params.verbose) + +} \ No newline at end of file diff --git a/tools/bowtie2/test/nextflow.config b/tools/bowtie2/test/nextflow.config new file mode 100644 index 00000000..c137a138 --- /dev/null +++ b/tools/bowtie2/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results'