test(trimgalore): Refactor se and pe to use pytest-workflow

2024-12-22 02:58:17 +00:00 · 2020-11-24 20:56:03 -06:00 · 2020-11-24 20:56:03 -06:00 · 2fc39e02e2
commit 2fc39e02e2
parent 445498d0c3
13 changed files with 61 additions and 322 deletions
--- a/software/trimgalore/test/input/test_R1.fastq.gz
+++ b/software/trimgalore/test/input/test_R1.fastq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R1.fastq.gz
--- a/software/trimgalore/test/input/test_R2.fastq.gz
+++ b/software/trimgalore/test/input/test_R2.fastq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R2.fastq.gz
--- a/software/trimgalore/test/input/test_single_end.fastq.gz
+++ b/software/trimgalore/test/input/test_single_end.fastq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_single_end.fastq.gz
--- a/software/trimgalore/test/main.nf
+++ b/software/trimgalore/test/main.nf
@ -1,36 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { TRIMGALORE as TRIMGALORE_SE } from '../main.nf'  addParams( options: [ publish_dir:'test_single_end' ] )
-include { TRIMGALORE as TRIMGALORE_PE } from '../main.nf'  addParams( options: [ publish_dir:'test_paired_end' ] )
-
-/*
- * Test with single-end data
- */
-workflow test_single_end {
-
-    def input = []
-    input = [ [ id:'test', single_end:true ], // meta map
-              [ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
-
-    TRIMGALORE_SE ( input )
-}
-
-/*
- * Test with paired-end data
- */
-workflow test_paired_end {
-
-    def input = []
-    input = [ [ id:'test', single_end:false ], // meta map
-              [ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
-                file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]
-
-    TRIMGALORE_PE ( input )
-}
-
-workflow {
-    test_single_end()
-    test_paired_end()
-}
--- a/software/trimgalore/test/nextflow.config
+++ b/software/trimgalore/test/nextflow.config
@ -1,25 +0,0 @@
-
-params {
-    outdir = "output/"
-    publish_dir_mode = "copy"
-    enable_conda = false
-
-    clip_r1 = 0
-    clip_r2 = 0
-    three_prime_clip_r1 = 0
-    three_prime_clip_r2 = 0
-}
-
-profiles {
-    conda  {
-        params.enable_conda = true
-    }
-    docker {
-        docker.enabled = true
-        docker.runOptions = '-u \$(id -u):\$(id -g)'
-    }
-    singularity {
-        singularity.enabled = true
-        singularity.autoMounts = true
-    }
-}
--- a/software/trimgalore/test/output/test_paired_end/test_1.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_paired_end/test_1.fastq.gz_trimming_report.txt
@ -1,97 +0,0 @@
-
-SUMMARISING RUN PARAMETERS
-==========================
-Input filename: test_1.fastq.gz
-Trimming mode: paired-end
-Trim Galore version: 0.6.4_dev
-Cutadapt version: 2.6
-Number of cores used for trimming: 1
-Quality Phred score cutoff: 20
-Quality encoding type selected: ASCII+33
-Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
-Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
-Maximum trimming error rate: 0.1 (default)
-Minimum required adapter overlap (stringency): 1 bp
-Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
-Output file will be GZIP compressed
-
-
-This is cutadapt 2.6 with Python 3.7.3
-Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_1.fastq.gz
-Processing reads on 1 core in single-end mode ...
-Finished in 0.64 s (64 us/read; 0.94 M reads/minute).
-
-=== Summary ===
-
-Total reads processed:                  10,000
-Reads with adapters:                     3,225 (32.2%)
-Reads written (passing filters):        10,000 (100.0%)
-
-Total basepairs processed:       760,000 bp
-Quality-trimmed:                   4,492 bp (0.6%)
-Total written (filtered):        748,403 bp (98.5%)
-
-=== Adapter 1 ===
-
-Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times.
-
-No. of allowed errors:
-0-9 bp: 0; 10-12 bp: 1
-
-Bases preceding removed adapters:
-  A: 23.8%
-  C: 28.2%
-  G: 22.7%
-  T: 25.3%
-  none/other: 0.0%
-
-Overview of removed sequences
-length	count	expect	max.err	error counts
-1	2170	2500.0	0	2170
-2	622	625.0	0	622
-3	223	156.2	0	223
-4	64	39.1	0	64
-5	14	9.8	0	14
-6	9	2.4	0	9
-7	8	0.6	0	8
-8	5	0.2	0	5
-9	4	0.0	0	4
-10	8	0.0	1	7 1
-11	3	0.0	1	3
-12	4	0.0	1	4
-13	6	0.0	1	6
-14	5	0.0	1	4 1
-15	5	0.0	1	5
-16	6	0.0	1	5 1
-17	3	0.0	1	3
-18	3	0.0	1	3
-19	1	0.0	1	1
-20	3	0.0	1	3
-21	7	0.0	1	7
-22	7	0.0	1	7
-23	3	0.0	1	3
-24	6	0.0	1	6
-25	4	0.0	1	4
-26	2	0.0	1	2
-27	4	0.0	1	4
-28	1	0.0	1	1
-29	3	0.0	1	3
-30	4	0.0	1	4
-32	3	0.0	1	3
-33	2	0.0	1	1 1
-34	1	0.0	1	1
-35	1	0.0	1	1
-40	1	0.0	1	1
-42	1	0.0	1	0 1
-45	1	0.0	1	0 1
-49	1	0.0	1	0 1
-52	1	0.0	1	0 1
-56	2	0.0	1	0 2
-59	1	0.0	1	0 1
-67	1	0.0	1	0 1
-70	2	0.0	1	0 2
-
-RUN STATISTICS FOR INPUT FILE: test_1.fastq.gz
-=============================================
-10000 sequences processed in total
-
--- a/software/trimgalore/test/output/test_paired_end/test_1_val_1.fq.gz
+++ b/software/trimgalore/test/output/test_paired_end/test_1_val_1.fq.gz
--- a/software/trimgalore/test/output/test_paired_end/test_2.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_paired_end/test_2.fastq.gz_trimming_report.txt
@ -1,100 +0,0 @@
-
-SUMMARISING RUN PARAMETERS
-==========================
-Input filename: test_2.fastq.gz
-Trimming mode: paired-end
-Trim Galore version: 0.6.4_dev
-Cutadapt version: 2.6
-Number of cores used for trimming: 1
-Quality Phred score cutoff: 20
-Quality encoding type selected: ASCII+33
-Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
-Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
-Maximum trimming error rate: 0.1 (default)
-Minimum required adapter overlap (stringency): 1 bp
-Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
-Output file will be GZIP compressed
-
-
-This is cutadapt 2.6 with Python 3.7.3
-Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_2.fastq.gz
-Processing reads on 1 core in single-end mode ...
-Finished in 0.70 s (70 us/read; 0.86 M reads/minute).
-
-=== Summary ===
-
-Total reads processed:                  10,000
-Reads with adapters:                     3,295 (33.0%)
-Reads written (passing filters):        10,000 (100.0%)
-
-Total basepairs processed:       760,000 bp
-Quality-trimmed:                   7,096 bp (0.9%)
-Total written (filtered):        745,649 bp (98.1%)
-
-=== Adapter 1 ===
-
-Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times.
-
-No. of allowed errors:
-0-9 bp: 0; 10-12 bp: 1
-
-Bases preceding removed adapters:
-  A: 22.6%
-  C: 28.2%
-  G: 23.6%
-  T: 25.6%
-  none/other: 0.0%
-
-Overview of removed sequences
-length	count	expect	max.err	error counts
-1	2213	2500.0	0	2213
-2	647	625.0	0	647
-3	239	156.2	0	239
-4	53	39.1	0	53
-5	10	9.8	0	10
-6	7	2.4	0	7
-7	8	0.6	0	8
-8	5	0.2	0	5
-9	5	0.0	0	5
-10	10	0.0	1	8 2
-11	2	0.0	1	2
-12	4	0.0	1	4
-13	7	0.0	1	7
-14	3	0.0	1	3
-15	4	0.0	1	4
-16	5	0.0	1	5
-17	3	0.0	1	3
-18	5	0.0	1	4 1
-19	2	0.0	1	1 1
-20	3	0.0	1	3
-21	7	0.0	1	7
-22	6	0.0	1	6
-23	3	0.0	1	3
-24	7	0.0	1	7
-25	4	0.0	1	4
-26	2	0.0	1	2
-27	4	0.0	1	4
-28	1	0.0	1	1
-29	3	0.0	1	3
-30	4	0.0	1	4
-32	3	0.0	1	3
-33	1	0.0	1	1
-34	1	0.0	1	1
-35	2	0.0	1	1 1
-40	1	0.0	1	0 1
-41	1	0.0	1	1
-46	1	0.0	1	0 1
-48	1	0.0	1	0 1
-49	2	0.0	1	0 2
-56	2	0.0	1	0 2
-59	1	0.0	1	0 1
-70	1	0.0	1	0 1
-73	2	0.0	1	0 2
-
-RUN STATISTICS FOR INPUT FILE: test_2.fastq.gz
-=============================================
-10000 sequences processed in total
-
-Total number of sequences analysed for the sequence pair length validation: 10000
-
-Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%)
--- a/software/trimgalore/test/output/test_paired_end/test_2_val_2.fq.gz
+++ b/software/trimgalore/test/output/test_paired_end/test_2_val_2.fq.gz
--- a/software/trimgalore/test/output/test_single_end/test.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_single_end/test.fastq.gz_trimming_report.txt
@ -1,61 +0,0 @@
-
-SUMMARISING RUN PARAMETERS
-==========================
-Input filename: test.fastq.gz
-Trimming mode: single-end
-Trim Galore version: 0.6.4_dev
-Cutadapt version: 2.6
-Number of cores used for trimming: 1
-Quality Phred score cutoff: 20
-Quality encoding type selected: ASCII+33
-Unable to auto-detect most prominent adapter from the first specified file (count Illumina: 0, count smallRNA: 0, count Nextera: 0)
-Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).
-Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
-Maximum trimming error rate: 0.1 (default)
-Minimum required adapter overlap (stringency): 1 bp
-Minimum required sequence length before a sequence gets removed: 20 bp
-Output file will be GZIP compressed
-
-
-This is cutadapt 2.6 with Python 3.7.3
-Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC test.fastq.gz
-Processing reads on 1 core in single-end mode ...
-Finished in 0.06 s (28 us/read; 2.13 M reads/minute).
-
-=== Summary ===
-
-Total reads processed:                   2,052
-Reads with adapters:                       223 (10.9%)
-Reads written (passing filters):         2,052 (100.0%)
-
-Total basepairs processed:       103,432 bp
-Quality-trimmed:                      11 bp (0.0%)
-Total written (filtered):        103,117 bp (99.7%)
-
-=== Adapter 1 ===
-
-Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 223 times.
-
-No. of allowed errors:
-0-9 bp: 0; 10-13 bp: 1
-
-Bases preceding removed adapters:
-  A: 31.8%
-  C: 37.7%
-  G: 16.1%
-  T: 14.3%
-  none/other: 0.0%
-
-Overview of removed sequences
-length	count	expect	max.err	error counts
-1	190	513.0	0	190
-2	3	128.2	0	3
-3	16	32.1	0	16
-4	10	8.0	0	10
-5	4	2.0	0	4
-
-RUN STATISTICS FOR INPUT FILE: test.fastq.gz
-=============================================
-2052 sequences processed in total
-Sequences removed because they became shorter than the length cutoff of 20 bp:	0 (0.0%)
-
--- a/software/trimgalore/test/output/test_single_end/test_trimmed.fq.gz
+++ b/software/trimgalore/test/output/test_single_end/test_trimmed.fq.gz
--- a/tests/software/trimgalore/main.nf
+++ b/tests/software/trimgalore/main.nf
@ -0,0 +1,40 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { TRIMGALORE as TRIMGALORE_SE } from '../../../software/trimgalore/main.nf'  addParams( options: [ publish_dir:'test_single_end' ] )
+include { TRIMGALORE as TRIMGALORE_PE } from '../../../software/trimgalore/main.nf'  addParams( options: [ publish_dir:'test_paired_end' ] )
+
+/*
+ * Test with single-end data
+ */
+workflow test_trimgalore_single_end {
+
+    def input = []
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file("${launchDir}/tests/data/fastq/rna/test_single_end.fastq.gz", checkIfExists: true) ] ]
+
+    TRIMGALORE_SE ( input )
+}
+
+// workflow test_trimgalore_single_end {
+
+//     def input = []
+//     input = [ [ id:'test', single_end:false ], // meta map
+//               [ file("${launchDir}/tests/data/fastq/rna/test_single_end.fastq.gz", checkIfExists: true) ] ]
+
+//     TRIMGALORE_SE ( input )
+// }
+
+/*
+ * Test with paired-end data
+ */
+workflow test_trimgalore_paired_end {
+
+    def input = []
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file("${launchDir}/tests/data/fastq/rna/test_R1.fastq.gz", checkIfExists: true),
+                file("${launchDir}/tests/data/fastq/rna/test_R2.fastq.gz", checkIfExists: true) ] ]
+
+    TRIMGALORE_PE ( input )
+}
--- a/tests/software/trimgalore/test.yml
+++ b/tests/software/trimgalore/test.yml
@ -0,0 +1,21 @@
+- name: Run trimgalore single-end test workflow
+  command: nextflow run ./tests/software/trimgalore/ -profile docker -entry test_trimgalore_single_end -c tests/config/nextflow.config
+  tags:
+    - trimgalore
+  files:
+    # These can't be md5'd reliably
+    # TODO Test for includes
+    - path: output/test_single_end/test.fastq.gz_trimming_report.txt
+    - path: output/test_single_end/test_trimmed.fq.gz
+
+- name: Run trimgalore paired-end test workflow
+  command: nextflow run ./tests/software/trimgalore/ -profile docker -entry test_trimgalore_paired_end -c tests/config/nextflow.config
+  tags:
+    - trimgalore
+  files:
+    # These can't be md5'd reliably
+    # TODO Test for includes
+    - path: output/test_paired_end/test_1.fastq.gz_trimming_report.txt
+    - path: output/test_paired_end/test_1_val_1.fq.gz
+    - path: output/test_paired_end/test_2.fastq.gz_trimming_report.txt
+    - path: output/test_paired_end/test_2_val_2.fq.gz
				`@ -1 +0,0 @@`
				`../../../../tests/data/fastq/rna/test_R1.fastq.gz`
				`@ -1 +0,0 @@`
				`../../../../tests/data/fastq/rna/test_single_end.fastq.gz`