From ab8a9025a0b4eecad45ad0d1d4339662543d111c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 5 Aug 2020 17:28:25 +0100 Subject: [PATCH] Copy across test data and docs for trimgalore --- software/trimgalore/meta.yml | 40 +++++++ .../trimgalore/test/input/test_R1.fastq.gz | 1 + .../trimgalore/test/input/test_R2.fastq.gz | 1 + software/trimgalore/test/main.nf | 27 +++++ software/trimgalore/test/nextflow.config | 2 + .../test_R1.fastq.gz_trimming_report.txt | 97 +++++++++++++++++ .../test/output/test_R1_val_1.fq.gz | 1 + .../test_R2.fastq.gz_trimming_report.txt | 100 ++++++++++++++++++ .../test/output/test_R2_val_2.fq.gz | 1 + 9 files changed, 270 insertions(+) create mode 100644 software/trimgalore/meta.yml create mode 120000 software/trimgalore/test/input/test_R1.fastq.gz create mode 120000 software/trimgalore/test/input/test_R2.fastq.gz create mode 100755 software/trimgalore/test/main.nf create mode 100644 software/trimgalore/test/nextflow.config create mode 100644 software/trimgalore/test/output/test_R1.fastq.gz_trimming_report.txt create mode 120000 software/trimgalore/test/output/test_R1_val_1.fq.gz create mode 100644 software/trimgalore/test/output/test_R2.fastq.gz_trimming_report.txt create mode 120000 software/trimgalore/test/output/test_R2_val_2.fq.gz diff --git a/software/trimgalore/meta.yml b/software/trimgalore/meta.yml new file mode 100644 index 00000000..642fb51b --- /dev/null +++ b/software/trimgalore/meta.yml @@ -0,0 +1,40 @@ +name: Trim Galore! +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters +tools: + - fastqc: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - sample_id: + type: string + description: Sample identifier + - trimmed_fastq: + type: file + description: Trimmed FastQ files + pattern: "*fq.gz" + - + - report: + type: file + description: Trim Galore! trimming report + pattern: "*trimming_report.txt" + +authors: + - "@ewels" + - "@FelixKrueger" diff --git a/software/trimgalore/test/input/test_R1.fastq.gz b/software/trimgalore/test/input/test_R1.fastq.gz new file mode 120000 index 00000000..e7b4b614 --- /dev/null +++ b/software/trimgalore/test/input/test_R1.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1.fastq.gz \ No newline at end of file diff --git a/software/trimgalore/test/input/test_R2.fastq.gz b/software/trimgalore/test/input/test_R2.fastq.gz new file mode 120000 index 00000000..5b7b57a3 --- /dev/null +++ b/software/trimgalore/test/input/test_R2.fastq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2.fastq.gz \ No newline at end of file diff --git a/software/trimgalore/test/main.nf b/software/trimgalore/test/main.nf new file mode 100755 index 00000000..5d1183a3 --- /dev/null +++ b/software/trimgalore/test/main.nf @@ -0,0 +1,27 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl=2 + +params.outdir = "." // gets set in the nextflow.config files (to './results/trim_galore') +params.verbose = false +params.trim_galore_args = '' +// trim_galore_args are best passed into the workflow in the following manner, e.g.: +// --trim_galore_args="--clip_r1 10 --clip_r2 15 -j 2" + +if (params.verbose){ + println ("[WORKFLOW] TRIM GALORE ARGS: " + params.trim_galore_args) +} + +// TODO: check the output files in some way +// include '../../../tests/functions/check_process_outputs.nf' +include '../main.nf' // params (clip_r1: 6, clip_r2: 10) // how to pass additional parameters + +ch_read_files = Channel + .fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1) + // .view() // to check whether the input channel works + +workflow { + + main: + TRIM_GALORE (ch_read_files, params.outdir, params.trim_galore_args, params.verbose) + +} diff --git a/software/trimgalore/test/nextflow.config b/software/trimgalore/test/nextflow.config new file mode 100644 index 00000000..63c458ca --- /dev/null +++ b/software/trimgalore/test/nextflow.config @@ -0,0 +1,2 @@ +// docker.enabled = true +params.outdir = './results' diff --git a/software/trimgalore/test/output/test_R1.fastq.gz_trimming_report.txt b/software/trimgalore/test/output/test_R1.fastq.gz_trimming_report.txt new file mode 100644 index 00000000..cf0d4bfa --- /dev/null +++ b/software/trimgalore/test/output/test_R1.fastq.gz_trimming_report.txt @@ -0,0 +1,97 @@ + +SUMMARISING RUN PARAMETERS +========================== +Input filename: test_R1.fastq.gz +Trimming mode: paired-end +Trim Galore version: 0.6.5 +Cutadapt version: 2.3 +Number of cores used for trimming: 1 +Quality Phred score cutoff: 20 +Quality encoding type selected: ASCII+33 +Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0) +Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected) +Maximum trimming error rate: 0.1 (default) +Minimum required adapter overlap (stringency): 1 bp +Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp +Output file will be GZIP compressed + + +This is cutadapt 2.3 with Python 3.7.3 +Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz +Processing reads on 1 core in single-end mode ... +Finished in 0.19 s (19 us/read; 3.12 M reads/minute). + +=== Summary === + +Total reads processed: 10,000 +Reads with adapters: 3,225 (32.2%) +Reads written (passing filters): 10,000 (100.0%) + +Total basepairs processed: 760,000 bp +Quality-trimmed: 4,492 bp (0.6%) +Total written (filtered): 748,403 bp (98.5%) + +=== Adapter 1 === + +Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times. + +No. of allowed errors: +0-9 bp: 0; 10-12 bp: 1 + +Bases preceding removed adapters: + A: 23.8% + C: 28.2% + G: 22.7% + T: 25.3% + none/other: 0.0% + +Overview of removed sequences +length count expect max.err error counts +1 2170 2500.0 0 2170 +2 622 625.0 0 622 +3 223 156.2 0 223 +4 64 39.1 0 64 +5 14 9.8 0 14 +6 9 2.4 0 9 +7 8 0.6 0 8 +8 5 0.2 0 5 +9 4 0.0 0 4 +10 8 0.0 1 7 1 +11 3 0.0 1 3 +12 4 0.0 1 4 +13 6 0.0 1 6 +14 5 0.0 1 4 1 +15 5 0.0 1 5 +16 6 0.0 1 5 1 +17 3 0.0 1 3 +18 3 0.0 1 3 +19 1 0.0 1 1 +20 3 0.0 1 3 +21 7 0.0 1 7 +22 7 0.0 1 7 +23 3 0.0 1 3 +24 6 0.0 1 6 +25 4 0.0 1 4 +26 2 0.0 1 2 +27 4 0.0 1 4 +28 1 0.0 1 1 +29 3 0.0 1 3 +30 4 0.0 1 4 +32 3 0.0 1 3 +33 2 0.0 1 1 1 +34 1 0.0 1 1 +35 1 0.0 1 1 +40 1 0.0 1 1 +42 1 0.0 1 0 1 +45 1 0.0 1 0 1 +49 1 0.0 1 0 1 +52 1 0.0 1 0 1 +56 2 0.0 1 0 2 +59 1 0.0 1 0 1 +67 1 0.0 1 0 1 +70 2 0.0 1 0 2 + +RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz +============================================= +10000 sequences processed in total + diff --git a/software/trimgalore/test/output/test_R1_val_1.fq.gz b/software/trimgalore/test/output/test_R1_val_1.fq.gz new file mode 120000 index 00000000..88ccdc87 --- /dev/null +++ b/software/trimgalore/test/output/test_R1_val_1.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz \ No newline at end of file diff --git a/software/trimgalore/test/output/test_R2.fastq.gz_trimming_report.txt b/software/trimgalore/test/output/test_R2.fastq.gz_trimming_report.txt new file mode 100644 index 00000000..c9640b9b --- /dev/null +++ b/software/trimgalore/test/output/test_R2.fastq.gz_trimming_report.txt @@ -0,0 +1,100 @@ + +SUMMARISING RUN PARAMETERS +========================== +Input filename: test_R2.fastq.gz +Trimming mode: paired-end +Trim Galore version: 0.6.5 +Cutadapt version: 2.3 +Number of cores used for trimming: 1 +Quality Phred score cutoff: 20 +Quality encoding type selected: ASCII+33 +Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0) +Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected) +Maximum trimming error rate: 0.1 (default) +Minimum required adapter overlap (stringency): 1 bp +Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp +Output file will be GZIP compressed + + +This is cutadapt 2.3 with Python 3.7.3 +Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz +Processing reads on 1 core in single-end mode ... +Finished in 0.22 s (22 us/read; 2.71 M reads/minute). + +=== Summary === + +Total reads processed: 10,000 +Reads with adapters: 3,295 (33.0%) +Reads written (passing filters): 10,000 (100.0%) + +Total basepairs processed: 760,000 bp +Quality-trimmed: 7,096 bp (0.9%) +Total written (filtered): 745,649 bp (98.1%) + +=== Adapter 1 === + +Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times. + +No. of allowed errors: +0-9 bp: 0; 10-12 bp: 1 + +Bases preceding removed adapters: + A: 22.6% + C: 28.2% + G: 23.6% + T: 25.6% + none/other: 0.0% + +Overview of removed sequences +length count expect max.err error counts +1 2213 2500.0 0 2213 +2 647 625.0 0 647 +3 239 156.2 0 239 +4 53 39.1 0 53 +5 10 9.8 0 10 +6 7 2.4 0 7 +7 8 0.6 0 8 +8 5 0.2 0 5 +9 5 0.0 0 5 +10 10 0.0 1 8 2 +11 2 0.0 1 2 +12 4 0.0 1 4 +13 7 0.0 1 7 +14 3 0.0 1 3 +15 4 0.0 1 4 +16 5 0.0 1 5 +17 3 0.0 1 3 +18 5 0.0 1 4 1 +19 2 0.0 1 1 1 +20 3 0.0 1 3 +21 7 0.0 1 7 +22 6 0.0 1 6 +23 3 0.0 1 3 +24 7 0.0 1 7 +25 4 0.0 1 4 +26 2 0.0 1 2 +27 4 0.0 1 4 +28 1 0.0 1 1 +29 3 0.0 1 3 +30 4 0.0 1 4 +32 3 0.0 1 3 +33 1 0.0 1 1 +34 1 0.0 1 1 +35 2 0.0 1 1 1 +40 1 0.0 1 0 1 +41 1 0.0 1 1 +46 1 0.0 1 0 1 +48 1 0.0 1 0 1 +49 2 0.0 1 0 2 +56 2 0.0 1 0 2 +59 1 0.0 1 0 1 +70 1 0.0 1 0 1 +73 2 0.0 1 0 2 + +RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz +============================================= +10000 sequences processed in total + +Total number of sequences analysed for the sequence pair length validation: 10000 + +Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%) diff --git a/software/trimgalore/test/output/test_R2_val_2.fq.gz b/software/trimgalore/test/output/test_R2_val_2.fq.gz new file mode 120000 index 00000000..440be644 --- /dev/null +++ b/software/trimgalore/test/output/test_R2_val_2.fq.gz @@ -0,0 +1 @@ +../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz \ No newline at end of file