mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-02 20:52:07 -05:00
Copy across test data and docs for trimgalore
This commit is contained in:
parent
50fce8b3cf
commit
ab8a9025a0
9 changed files with 270 additions and 0 deletions
40
software/trimgalore/meta.yml
Normal file
40
software/trimgalore/meta.yml
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
name: Trim Galore!
|
||||||
|
description: Trim FastQ files using Trim Galore!
|
||||||
|
keywords:
|
||||||
|
- trimming
|
||||||
|
- adapters
|
||||||
|
- sequencing adapters
|
||||||
|
tools:
|
||||||
|
- fastqc:
|
||||||
|
description: |
|
||||||
|
A wrapper tool around Cutadapt and FastQC to consistently apply quality
|
||||||
|
and adapter trimming to FastQ files, with some extra functionality for
|
||||||
|
MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
|
||||||
|
homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
|
||||||
|
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
|
||||||
|
input:
|
||||||
|
-
|
||||||
|
- sample_id:
|
||||||
|
type: string
|
||||||
|
description: Sample identifier
|
||||||
|
- reads:
|
||||||
|
type: file
|
||||||
|
description: Input FastQ file, or pair of files
|
||||||
|
output:
|
||||||
|
-
|
||||||
|
- sample_id:
|
||||||
|
type: string
|
||||||
|
description: Sample identifier
|
||||||
|
- trimmed_fastq:
|
||||||
|
type: file
|
||||||
|
description: Trimmed FastQ files
|
||||||
|
pattern: "*fq.gz"
|
||||||
|
-
|
||||||
|
- report:
|
||||||
|
type: file
|
||||||
|
description: Trim Galore! trimming report
|
||||||
|
pattern: "*trimming_report.txt"
|
||||||
|
|
||||||
|
authors:
|
||||||
|
- "@ewels"
|
||||||
|
- "@FelixKrueger"
|
1
software/trimgalore/test/input/test_R1.fastq.gz
Symbolic link
1
software/trimgalore/test/input/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/trimgalore/test/input/test_R2.fastq.gz
Symbolic link
1
software/trimgalore/test/input/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
27
software/trimgalore/test/main.nf
Executable file
27
software/trimgalore/test/main.nf
Executable file
|
@ -0,0 +1,27 @@
|
||||||
|
#!/usr/bin/env nextflow
|
||||||
|
nextflow.preview.dsl=2
|
||||||
|
|
||||||
|
params.outdir = "." // gets set in the nextflow.config files (to './results/trim_galore')
|
||||||
|
params.verbose = false
|
||||||
|
params.trim_galore_args = ''
|
||||||
|
// trim_galore_args are best passed into the workflow in the following manner, e.g.:
|
||||||
|
// --trim_galore_args="--clip_r1 10 --clip_r2 15 -j 2"
|
||||||
|
|
||||||
|
if (params.verbose){
|
||||||
|
println ("[WORKFLOW] TRIM GALORE ARGS: " + params.trim_galore_args)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: check the output files in some way
|
||||||
|
// include '../../../tests/functions/check_process_outputs.nf'
|
||||||
|
include '../main.nf' // params (clip_r1: 6, clip_r2: 10) // how to pass additional parameters
|
||||||
|
|
||||||
|
ch_read_files = Channel
|
||||||
|
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
|
||||||
|
// .view() // to check whether the input channel works
|
||||||
|
|
||||||
|
workflow {
|
||||||
|
|
||||||
|
main:
|
||||||
|
TRIM_GALORE (ch_read_files, params.outdir, params.trim_galore_args, params.verbose)
|
||||||
|
|
||||||
|
}
|
2
software/trimgalore/test/nextflow.config
Normal file
2
software/trimgalore/test/nextflow.config
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
// docker.enabled = true
|
||||||
|
params.outdir = './results'
|
|
@ -0,0 +1,97 @@
|
||||||
|
|
||||||
|
SUMMARISING RUN PARAMETERS
|
||||||
|
==========================
|
||||||
|
Input filename: test_R1.fastq.gz
|
||||||
|
Trimming mode: paired-end
|
||||||
|
Trim Galore version: 0.6.5
|
||||||
|
Cutadapt version: 2.3
|
||||||
|
Number of cores used for trimming: 1
|
||||||
|
Quality Phred score cutoff: 20
|
||||||
|
Quality encoding type selected: ASCII+33
|
||||||
|
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||||
|
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||||
|
Maximum trimming error rate: 0.1 (default)
|
||||||
|
Minimum required adapter overlap (stringency): 1 bp
|
||||||
|
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||||
|
Output file will be GZIP compressed
|
||||||
|
|
||||||
|
|
||||||
|
This is cutadapt 2.3 with Python 3.7.3
|
||||||
|
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz
|
||||||
|
Processing reads on 1 core in single-end mode ...
|
||||||
|
Finished in 0.19 s (19 us/read; 3.12 M reads/minute).
|
||||||
|
|
||||||
|
=== Summary ===
|
||||||
|
|
||||||
|
Total reads processed: 10,000
|
||||||
|
Reads with adapters: 3,225 (32.2%)
|
||||||
|
Reads written (passing filters): 10,000 (100.0%)
|
||||||
|
|
||||||
|
Total basepairs processed: 760,000 bp
|
||||||
|
Quality-trimmed: 4,492 bp (0.6%)
|
||||||
|
Total written (filtered): 748,403 bp (98.5%)
|
||||||
|
|
||||||
|
=== Adapter 1 ===
|
||||||
|
|
||||||
|
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3225 times.
|
||||||
|
|
||||||
|
No. of allowed errors:
|
||||||
|
0-9 bp: 0; 10-12 bp: 1
|
||||||
|
|
||||||
|
Bases preceding removed adapters:
|
||||||
|
A: 23.8%
|
||||||
|
C: 28.2%
|
||||||
|
G: 22.7%
|
||||||
|
T: 25.3%
|
||||||
|
none/other: 0.0%
|
||||||
|
|
||||||
|
Overview of removed sequences
|
||||||
|
length count expect max.err error counts
|
||||||
|
1 2170 2500.0 0 2170
|
||||||
|
2 622 625.0 0 622
|
||||||
|
3 223 156.2 0 223
|
||||||
|
4 64 39.1 0 64
|
||||||
|
5 14 9.8 0 14
|
||||||
|
6 9 2.4 0 9
|
||||||
|
7 8 0.6 0 8
|
||||||
|
8 5 0.2 0 5
|
||||||
|
9 4 0.0 0 4
|
||||||
|
10 8 0.0 1 7 1
|
||||||
|
11 3 0.0 1 3
|
||||||
|
12 4 0.0 1 4
|
||||||
|
13 6 0.0 1 6
|
||||||
|
14 5 0.0 1 4 1
|
||||||
|
15 5 0.0 1 5
|
||||||
|
16 6 0.0 1 5 1
|
||||||
|
17 3 0.0 1 3
|
||||||
|
18 3 0.0 1 3
|
||||||
|
19 1 0.0 1 1
|
||||||
|
20 3 0.0 1 3
|
||||||
|
21 7 0.0 1 7
|
||||||
|
22 7 0.0 1 7
|
||||||
|
23 3 0.0 1 3
|
||||||
|
24 6 0.0 1 6
|
||||||
|
25 4 0.0 1 4
|
||||||
|
26 2 0.0 1 2
|
||||||
|
27 4 0.0 1 4
|
||||||
|
28 1 0.0 1 1
|
||||||
|
29 3 0.0 1 3
|
||||||
|
30 4 0.0 1 4
|
||||||
|
32 3 0.0 1 3
|
||||||
|
33 2 0.0 1 1 1
|
||||||
|
34 1 0.0 1 1
|
||||||
|
35 1 0.0 1 1
|
||||||
|
40 1 0.0 1 1
|
||||||
|
42 1 0.0 1 0 1
|
||||||
|
45 1 0.0 1 0 1
|
||||||
|
49 1 0.0 1 0 1
|
||||||
|
52 1 0.0 1 0 1
|
||||||
|
56 2 0.0 1 0 2
|
||||||
|
59 1 0.0 1 0 1
|
||||||
|
67 1 0.0 1 0 1
|
||||||
|
70 2 0.0 1 0 2
|
||||||
|
|
||||||
|
RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz
|
||||||
|
=============================================
|
||||||
|
10000 sequences processed in total
|
||||||
|
|
1
software/trimgalore/test/output/test_R1_val_1.fq.gz
Symbolic link
1
software/trimgalore/test/output/test_R1_val_1.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
SUMMARISING RUN PARAMETERS
|
||||||
|
==========================
|
||||||
|
Input filename: test_R2.fastq.gz
|
||||||
|
Trimming mode: paired-end
|
||||||
|
Trim Galore version: 0.6.5
|
||||||
|
Cutadapt version: 2.3
|
||||||
|
Number of cores used for trimming: 1
|
||||||
|
Quality Phred score cutoff: 20
|
||||||
|
Quality encoding type selected: ASCII+33
|
||||||
|
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||||
|
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||||
|
Maximum trimming error rate: 0.1 (default)
|
||||||
|
Minimum required adapter overlap (stringency): 1 bp
|
||||||
|
Minimum required sequence length for both reads before a sequence pair gets removed: 20 bp
|
||||||
|
Output file will be GZIP compressed
|
||||||
|
|
||||||
|
|
||||||
|
This is cutadapt 2.3 with Python 3.7.3
|
||||||
|
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz
|
||||||
|
Processing reads on 1 core in single-end mode ...
|
||||||
|
Finished in 0.22 s (22 us/read; 2.71 M reads/minute).
|
||||||
|
|
||||||
|
=== Summary ===
|
||||||
|
|
||||||
|
Total reads processed: 10,000
|
||||||
|
Reads with adapters: 3,295 (33.0%)
|
||||||
|
Reads written (passing filters): 10,000 (100.0%)
|
||||||
|
|
||||||
|
Total basepairs processed: 760,000 bp
|
||||||
|
Quality-trimmed: 7,096 bp (0.9%)
|
||||||
|
Total written (filtered): 745,649 bp (98.1%)
|
||||||
|
|
||||||
|
=== Adapter 1 ===
|
||||||
|
|
||||||
|
Sequence: CTGTCTCTTATA; Type: regular 3'; Length: 12; Trimmed: 3295 times.
|
||||||
|
|
||||||
|
No. of allowed errors:
|
||||||
|
0-9 bp: 0; 10-12 bp: 1
|
||||||
|
|
||||||
|
Bases preceding removed adapters:
|
||||||
|
A: 22.6%
|
||||||
|
C: 28.2%
|
||||||
|
G: 23.6%
|
||||||
|
T: 25.6%
|
||||||
|
none/other: 0.0%
|
||||||
|
|
||||||
|
Overview of removed sequences
|
||||||
|
length count expect max.err error counts
|
||||||
|
1 2213 2500.0 0 2213
|
||||||
|
2 647 625.0 0 647
|
||||||
|
3 239 156.2 0 239
|
||||||
|
4 53 39.1 0 53
|
||||||
|
5 10 9.8 0 10
|
||||||
|
6 7 2.4 0 7
|
||||||
|
7 8 0.6 0 8
|
||||||
|
8 5 0.2 0 5
|
||||||
|
9 5 0.0 0 5
|
||||||
|
10 10 0.0 1 8 2
|
||||||
|
11 2 0.0 1 2
|
||||||
|
12 4 0.0 1 4
|
||||||
|
13 7 0.0 1 7
|
||||||
|
14 3 0.0 1 3
|
||||||
|
15 4 0.0 1 4
|
||||||
|
16 5 0.0 1 5
|
||||||
|
17 3 0.0 1 3
|
||||||
|
18 5 0.0 1 4 1
|
||||||
|
19 2 0.0 1 1 1
|
||||||
|
20 3 0.0 1 3
|
||||||
|
21 7 0.0 1 7
|
||||||
|
22 6 0.0 1 6
|
||||||
|
23 3 0.0 1 3
|
||||||
|
24 7 0.0 1 7
|
||||||
|
25 4 0.0 1 4
|
||||||
|
26 2 0.0 1 2
|
||||||
|
27 4 0.0 1 4
|
||||||
|
28 1 0.0 1 1
|
||||||
|
29 3 0.0 1 3
|
||||||
|
30 4 0.0 1 4
|
||||||
|
32 3 0.0 1 3
|
||||||
|
33 1 0.0 1 1
|
||||||
|
34 1 0.0 1 1
|
||||||
|
35 2 0.0 1 1 1
|
||||||
|
40 1 0.0 1 0 1
|
||||||
|
41 1 0.0 1 1
|
||||||
|
46 1 0.0 1 0 1
|
||||||
|
48 1 0.0 1 0 1
|
||||||
|
49 2 0.0 1 0 2
|
||||||
|
56 2 0.0 1 0 2
|
||||||
|
59 1 0.0 1 0 1
|
||||||
|
70 1 0.0 1 0 1
|
||||||
|
73 2 0.0 1 0 2
|
||||||
|
|
||||||
|
RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz
|
||||||
|
=============================================
|
||||||
|
10000 sequences processed in total
|
||||||
|
|
||||||
|
Total number of sequences analysed for the sequence pair length validation: 10000
|
||||||
|
|
||||||
|
Number of sequence pairs removed because at least one read was shorter than the length cutoff (20 bp): 21 (0.21%)
|
1
software/trimgalore/test/output/test_R2_val_2.fq.gz
Symbolic link
1
software/trimgalore/test/output/test_R2_val_2.fq.gz
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
Loading…
Reference in a new issue