test(trimgalore): Refactor se and pe to use pytest-workflow

This commit is contained in:
Edmund Miller 2020-11-24 20:56:03 -06:00
parent 445498d0c3
commit 2fc39e02e2
No known key found for this signature in database
GPG key ID: BD387FF7BC10AA9D
13 changed files with 61 additions and 322 deletions

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/rna/test_R1.fastq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/rna/test_R2.fastq.gz

View file

@ -1 +0,0 @@
../../../../tests/data/fastq/rna/test_single_end.fastq.gz

View file

@ -1,36 +0,0 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { TRIMGALORE as TRIMGALORE_SE } from '../main.nf' addParams( options: [ publish_dir:'test_single_end' ] )
include { TRIMGALORE as TRIMGALORE_PE } from '../main.nf' addParams( options: [ publish_dir:'test_paired_end' ] )
/*
* Test with single-end data
*/
workflow test_single_end {
def input = []
input = [ [ id:'test', single_end:true ], // meta map
[ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
TRIMGALORE_SE ( input )
}
/*
* Test with paired-end data
*/
workflow test_paired_end {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
[ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]
TRIMGALORE_PE ( input )
}
workflow {
test_single_end()
test_paired_end()
}

View file

@ -1,25 +0,0 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
enable_conda = false
clip_r1 = 0
clip_r2 = 0
three_prime_clip_r1 = 0
three_prime_clip_r2 = 0
}
profiles {
conda {
params.enable_conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

View file

@ -1,97 +0,0 @@
SUMMARISING RUN PARAMETERS
==========================
Input filename: test_1.fastq.gz
Trimming mode: paired-end
Trim Galore version: 0.6.4_dev
Cutadapt version: 2.6
Number of cores used for trimming: 1
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
Maximum trimming error rate: 0.1 (default)
Minimum required adapter overlap (stringency): 1 bp
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
Output file will be GZIP compressed
This is cutadapt 2.6 with Python 3.7.3
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_1.fastq.gz
Processing reads on 1 core in single-end mode ...
Finished in 0.64 s (64 us/read; 0.94 M reads/minute).
=== Summary ===
Total reads processed: 10,000
Reads with adapters: 3,225 (32.2%)
Reads written (passing filters): 10,000 (100.0%)
Total basepairs processed: 760,000 bp
Quality-trimmed: 4,492 bp (0.6%)
Total written (filtered): 748,403 bp (98.5%)
=== Adapter 1 ===
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times.
No. of allowed errors:
0-9 bp: 0; 10-12 bp: 1
Bases preceding removed adapters:
A: 23.8%
C: 28.2%
G: 22.7%
T: 25.3%
none/other: 0.0%
Overview of removed sequences
length count expect max.err error counts
1 2170 2500.0 0 2170
2 622 625.0 0 622
3 223 156.2 0 223
4 64 39.1 0 64
5 14 9.8 0 14
6 9 2.4 0 9
7 8 0.6 0 8
8 5 0.2 0 5
9 4 0.0 0 4
10 8 0.0 1 7 1
11 3 0.0 1 3
12 4 0.0 1 4
13 6 0.0 1 6
14 5 0.0 1 4 1
15 5 0.0 1 5
16 6 0.0 1 5 1
17 3 0.0 1 3
18 3 0.0 1 3
19 1 0.0 1 1
20 3 0.0 1 3
21 7 0.0 1 7
22 7 0.0 1 7
23 3 0.0 1 3
24 6 0.0 1 6
25 4 0.0 1 4
26 2 0.0 1 2
27 4 0.0 1 4
28 1 0.0 1 1
29 3 0.0 1 3
30 4 0.0 1 4
32 3 0.0 1 3
33 2 0.0 1 1 1
34 1 0.0 1 1
35 1 0.0 1 1
40 1 0.0 1 1
42 1 0.0 1 0 1
45 1 0.0 1 0 1
49 1 0.0 1 0 1
52 1 0.0 1 0 1
56 2 0.0 1 0 2
59 1 0.0 1 0 1
67 1 0.0 1 0 1
70 2 0.0 1 0 2
RUN STATISTICS FOR INPUT FILE: test_1.fastq.gz
=============================================
10000 sequences processed in total

View file

@ -1,100 +0,0 @@
SUMMARISING RUN PARAMETERS
==========================
Input filename: test_2.fastq.gz
Trimming mode: paired-end
Trim Galore version: 0.6.4_dev
Cutadapt version: 2.6
Number of cores used for trimming: 1
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
Maximum trimming error rate: 0.1 (default)
Minimum required adapter overlap (stringency): 1 bp
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
Output file will be GZIP compressed
This is cutadapt 2.6 with Python 3.7.3
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_2.fastq.gz
Processing reads on 1 core in single-end mode ...
Finished in 0.70 s (70 us/read; 0.86 M reads/minute).
=== Summary ===
Total reads processed: 10,000
Reads with adapters: 3,295 (33.0%)
Reads written (passing filters): 10,000 (100.0%)
Total basepairs processed: 760,000 bp
Quality-trimmed: 7,096 bp (0.9%)
Total written (filtered): 745,649 bp (98.1%)
=== Adapter 1 ===
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times.
No. of allowed errors:
0-9 bp: 0; 10-12 bp: 1
Bases preceding removed adapters:
A: 22.6%
C: 28.2%
G: 23.6%
T: 25.6%
none/other: 0.0%
Overview of removed sequences
length count expect max.err error counts
1 2213 2500.0 0 2213
2 647 625.0 0 647
3 239 156.2 0 239
4 53 39.1 0 53
5 10 9.8 0 10
6 7 2.4 0 7
7 8 0.6 0 8
8 5 0.2 0 5
9 5 0.0 0 5
10 10 0.0 1 8 2
11 2 0.0 1 2
12 4 0.0 1 4
13 7 0.0 1 7
14 3 0.0 1 3
15 4 0.0 1 4
16 5 0.0 1 5
17 3 0.0 1 3
18 5 0.0 1 4 1
19 2 0.0 1 1 1
20 3 0.0 1 3
21 7 0.0 1 7
22 6 0.0 1 6
23 3 0.0 1 3
24 7 0.0 1 7
25 4 0.0 1 4
26 2 0.0 1 2
27 4 0.0 1 4
28 1 0.0 1 1
29 3 0.0 1 3
30 4 0.0 1 4
32 3 0.0 1 3
33 1 0.0 1 1
34 1 0.0 1 1
35 2 0.0 1 1 1
40 1 0.0 1 0 1
41 1 0.0 1 1
46 1 0.0 1 0 1
48 1 0.0 1 0 1
49 2 0.0 1 0 2
56 2 0.0 1 0 2
59 1 0.0 1 0 1
70 1 0.0 1 0 1
73 2 0.0 1 0 2
RUN STATISTICS FOR INPUT FILE: test_2.fastq.gz
=============================================
10000 sequences processed in total
Total number of sequences analysed for the sequence pair length validation: 10000
Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%)

View file

@ -1,61 +0,0 @@
SUMMARISING RUN PARAMETERS
==========================
Input filename: test.fastq.gz
Trimming mode: single-end
Trim Galore version: 0.6.4_dev
Cutadapt version: 2.6
Number of cores used for trimming: 1
Quality Phred score cutoff: 20
Quality encoding type selected: ASCII+33
Unable to auto-detect most prominent adapter from the first specified file (count Illumina: 0, count smallRNA: 0, count Nextera: 0)
Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).
Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
Maximum trimming error rate: 0.1 (default)
Minimum required adapter overlap (stringency): 1 bp
Minimum required sequence length before a sequence gets removed: 20 bp
Output file will be GZIP compressed
This is cutadapt 2.6 with Python 3.7.3
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC test.fastq.gz
Processing reads on 1 core in single-end mode ...
Finished in 0.06 s (28 us/read; 2.13 M reads/minute).
=== Summary ===
Total reads processed: 2,052
Reads with adapters: 223 (10.9%)
Reads written (passing filters): 2,052 (100.0%)
Total basepairs processed: 103,432 bp
Quality-trimmed: 11 bp (0.0%)
Total written (filtered): 103,117 bp (99.7%)
=== Adapter 1 ===
Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 223 times.
No. of allowed errors:
0-9 bp: 0; 10-13 bp: 1
Bases preceding removed adapters:
A: 31.8%
C: 37.7%
G: 16.1%
T: 14.3%
none/other: 0.0%
Overview of removed sequences
length count expect max.err error counts
1 190 513.0 0 190
2 3 128.2 0 3
3 16 32.1 0 16
4 10 8.0 0 10
5 4 2.0 0 4
RUN STATISTICS FOR INPUT FILE: test.fastq.gz
=============================================
2052 sequences processed in total
Sequences removed because they became shorter than the length cutoff of 20 bp: 0 (0.0%)

View file

@ -0,0 +1,40 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { TRIMGALORE as TRIMGALORE_SE } from '../../../software/trimgalore/main.nf' addParams( options: [ publish_dir:'test_single_end' ] )
include { TRIMGALORE as TRIMGALORE_PE } from '../../../software/trimgalore/main.nf' addParams( options: [ publish_dir:'test_paired_end' ] )
/*
* Test with single-end data
*/
workflow test_trimgalore_single_end {
def input = []
input = [ [ id:'test', single_end:true ], // meta map
[ file("${launchDir}/tests/data/fastq/rna/test_single_end.fastq.gz", checkIfExists: true) ] ]
TRIMGALORE_SE ( input )
}
// workflow test_trimgalore_single_end {
// def input = []
// input = [ [ id:'test', single_end:false ], // meta map
// [ file("${launchDir}/tests/data/fastq/rna/test_single_end.fastq.gz", checkIfExists: true) ] ]
// TRIMGALORE_SE ( input )
// }
/*
* Test with paired-end data
*/
workflow test_trimgalore_paired_end {
def input = []
input = [ [ id:'test', single_end:false ], // meta map
[ file("${launchDir}/tests/data/fastq/rna/test_R1.fastq.gz", checkIfExists: true),
file("${launchDir}/tests/data/fastq/rna/test_R2.fastq.gz", checkIfExists: true) ] ]
TRIMGALORE_PE ( input )
}

View file

@ -0,0 +1,21 @@
- name: Run trimgalore single-end test workflow
command: nextflow run ./tests/software/trimgalore/ -profile docker -entry test_trimgalore_single_end -c tests/config/nextflow.config
tags:
- trimgalore
files:
# These can't be md5'd reliably
# TODO Test for includes
- path: output/test_single_end/test.fastq.gz_trimming_report.txt
- path: output/test_single_end/test_trimmed.fq.gz
- name: Run trimgalore paired-end test workflow
command: nextflow run ./tests/software/trimgalore/ -profile docker -entry test_trimgalore_paired_end -c tests/config/nextflow.config
tags:
- trimgalore
files:
# These can't be md5'd reliably
# TODO Test for includes
- path: output/test_paired_end/test_1.fastq.gz_trimming_report.txt
- path: output/test_paired_end/test_1_val_1.fq.gz
- path: output/test_paired_end/test_2.fastq.gz_trimming_report.txt
- path: output/test_paired_end/test_2_val_2.fq.gz