mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-12-22 11:08:17 +00:00
Merge pull request #60 from drpatelh/master
Add docs and tests for TrimGalore!
This commit is contained in:
commit
ad4151703f
15 changed files with 227 additions and 79 deletions
|
@ -1,18 +1,18 @@
|
|||
name: Trim Galore!
|
||||
name: trimgalore
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- software/trim_galore/**
|
||||
- .github/workflows/trim_galore.yml
|
||||
- software/trimgalore/**
|
||||
- .github/workflows/trimgalore.yml
|
||||
- tests
|
||||
pull_request:
|
||||
paths:
|
||||
- software/trim_galore/**
|
||||
- .github/workflows/trim_galore.yml
|
||||
- software/trimgalore/**
|
||||
- .github/workflows/trimgalore.yml
|
||||
- tests
|
||||
|
||||
jobs:
|
||||
run_ci_test:
|
||||
ci_test:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
NXF_ANSI_LOG: false
|
||||
|
@ -22,8 +22,9 @@ jobs:
|
|||
|
||||
- name: Install Nextflow
|
||||
run: |
|
||||
export NXF_VER="20.07.1"
|
||||
wget -qO- get.nextflow.io | bash
|
||||
sudo mv nextflow /usr/local/bin/
|
||||
|
||||
# Test the module
|
||||
- run: nextflow run ./software/trim_galore/test/
|
||||
- run: nextflow run ./software/trimgalore/test/ -profile docker
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
|
@ -1,40 +1,98 @@
|
|||
name: Trim Galore!
|
||||
name: trimgalore
|
||||
description: Trim FastQ files using Trim Galore!
|
||||
keywords:
|
||||
- trimming
|
||||
- adapters
|
||||
- sequencing adapters
|
||||
- trimming
|
||||
- adapters
|
||||
- sequencing adapters
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
A wrapper tool around Cutadapt and FastQC to consistently apply quality
|
||||
and adapter trimming to FastQ files, with some extra functionality for
|
||||
MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
|
||||
homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
|
||||
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
|
||||
- trimgalore:
|
||||
description: |
|
||||
A wrapper tool around Cutadapt and FastQC to consistently apply quality
|
||||
and adapter trimming to FastQ files, with some extra functionality for
|
||||
MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
|
||||
homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
|
||||
documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
|
||||
params:
|
||||
- outdir:
|
||||
type: string
|
||||
description: |
|
||||
The pipeline's output directory. By default, the module will
|
||||
output files into `$params.outdir/<SOFTWARE>`
|
||||
- publish_dir_mode:
|
||||
type: string
|
||||
description: |
|
||||
Value for the Nextflow `publishDir` mode parameter.
|
||||
Available: symlink, rellink, link, copy, copyNoFollow, move.
|
||||
- conda:
|
||||
type: boolean
|
||||
description: |
|
||||
Run the module with Conda using the software specified
|
||||
via the `conda` directive
|
||||
- clip_r1:
|
||||
type: integer
|
||||
description: |
|
||||
Instructs Trim Galore to remove bp from the 5' end of read 1
|
||||
(or single-end reads)
|
||||
- clip_r2:
|
||||
type: integer
|
||||
description: |
|
||||
Instructs Trim Galore to remove bp from the 5' end of read 2
|
||||
(paired-end reads only)
|
||||
- three_prime_clip_r1:
|
||||
type: integer
|
||||
description: |
|
||||
Instructs Trim Galore to remove bp from the 3' end of read 1
|
||||
AFTER adapter/quality trimming has been performed
|
||||
- three_prime_clip_r2:
|
||||
type: integer
|
||||
description: |
|
||||
Instructs Trim Galore to re move bp from the 3' end of read 2
|
||||
AFTER adapter/quality trimming has been performed
|
||||
input:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: Input FastQ file, or pair of files
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
- options:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing module options for passing command-line arguments and
|
||||
output file paths.
|
||||
output:
|
||||
-
|
||||
- sample_id:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- trimmed_fastq:
|
||||
type: file
|
||||
description: Trimmed FastQ files
|
||||
pattern: "*fq.gz"
|
||||
-
|
||||
- report:
|
||||
type: file
|
||||
description: Trim Galore! trimming report
|
||||
pattern: "*trimming_report.txt"
|
||||
|
||||
- meta:
|
||||
type: map
|
||||
description: |
|
||||
Groovy Map containing sample information
|
||||
e.g. [ id:'test', single_end:false ]
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input adapter trimmed FastQ files of size 1 and 2 for
|
||||
single-end and paired-end data, respectively.
|
||||
pattern: "*.fq.gz"
|
||||
- html:
|
||||
type: file
|
||||
description: FastQC report (optional)
|
||||
pattern: "*_fastqc.html"
|
||||
- zip:
|
||||
type: file
|
||||
description: FastQC report archive (optional)
|
||||
pattern: "*_fastqc.zip"
|
||||
- log:
|
||||
type: file
|
||||
description: Trim Galore! trimming report
|
||||
pattern: "*report.txt"
|
||||
- version:
|
||||
type: file
|
||||
description: File containing software version
|
||||
pattern: "*.version.txt"
|
||||
authors:
|
||||
- "@drpatelh"
|
||||
- "@ewels"
|
||||
- "@FelixKrueger"
|
||||
|
|
1
software/trimgalore/test/input/test_single_end.fastq.gz
Symbolic link
1
software/trimgalore/test/input/test_single_end.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_single_end.fastq.gz
|
|
@ -1,27 +1,35 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl=2
|
||||
|
||||
params.outdir = "." // gets set in the nextflow.config files (to './results/trim_galore')
|
||||
params.verbose = false
|
||||
params.trim_galore_args = ''
|
||||
// trim_galore_args are best passed into the workflow in the following manner, e.g.:
|
||||
// --trim_galore_args="--clip_r1 10 --clip_r2 15 -j 2"
|
||||
nextflow.enable.dsl = 2
|
||||
|
||||
if (params.verbose){
|
||||
println ("[WORKFLOW] TRIM GALORE ARGS: " + params.trim_galore_args)
|
||||
include { TRIMGALORE } from '../main.nf'
|
||||
|
||||
/*
|
||||
* Test with single-end data
|
||||
*/
|
||||
workflow test_single_end {
|
||||
|
||||
def input = []
|
||||
input = [ [ id:'test', single_end:true ], // meta map
|
||||
[ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
|
||||
|
||||
TRIMGALORE ( input, [ publish_dir:'test_single_end' ] )
|
||||
}
|
||||
|
||||
// TODO: check the output files in some way
|
||||
// include '../../../tests/functions/check_process_outputs.nf'
|
||||
include '../main.nf' // params (clip_r1: 6, clip_r2: 10) // how to pass additional parameters
|
||||
/*
|
||||
* Test with paired-end data
|
||||
*/
|
||||
workflow test_paired_end {
|
||||
|
||||
ch_read_files = Channel
|
||||
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
|
||||
// .view() // to check whether the input channel works
|
||||
def input = []
|
||||
input = [ [ id:'test', single_end:false ], // meta map
|
||||
[ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
|
||||
file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]
|
||||
|
||||
TRIMGALORE ( input, [ publish_dir:'test_paired_end' ] )
|
||||
}
|
||||
|
||||
workflow {
|
||||
|
||||
main:
|
||||
TRIM_GALORE (ch_read_files, params.outdir, params.trim_galore_args, params.verbose)
|
||||
|
||||
test_single_end()
|
||||
test_paired_end()
|
||||
}
|
||||
|
|
|
@ -1,2 +1,25 @@
|
|||
// docker.enabled = true
|
||||
params.outdir = './results'
|
||||
|
||||
params {
|
||||
outdir = "output/"
|
||||
publish_dir_mode = "copy"
|
||||
conda = false
|
||||
|
||||
clip_r1 = 0
|
||||
clip_r2 = 0
|
||||
three_prime_clip_r1 = 0
|
||||
three_prime_clip_r2 = 0
|
||||
}
|
||||
|
||||
profiles {
|
||||
conda {
|
||||
params.conda = true
|
||||
}
|
||||
docker {
|
||||
docker.enabled = true
|
||||
docker.runOptions = '-u \$(id -u):\$(id -g)'
|
||||
}
|
||||
singularity {
|
||||
singularity.enabled = true
|
||||
singularity.autoMounts = true
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
|
|
@ -1 +0,0 @@
|
|||
../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
|
|
@ -1,14 +1,14 @@
|
|||
|
||||
SUMMARISING RUN PARAMETERS
|
||||
==========================
|
||||
Input filename: test_R1.fastq.gz
|
||||
Input filename: test_1.fastq.gz
|
||||
Trimming mode: paired-end
|
||||
Trim Galore version: 0.6.5
|
||||
Cutadapt version: 2.3
|
||||
Trim Galore version: 0.6.4_dev
|
||||
Cutadapt version: 2.6
|
||||
Number of cores used for trimming: 1
|
||||
Quality Phred score cutoff: 20
|
||||
Quality encoding type selected: ASCII+33
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
|
||||
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||
Maximum trimming error rate: 0.1 (default)
|
||||
Minimum required adapter overlap (stringency): 1 bp
|
||||
|
@ -16,10 +16,10 @@ Minimum required sequence length for both reads before a sequence pair gets remo
|
|||
Output file will be GZIP compressed
|
||||
|
||||
|
||||
This is cutadapt 2.3 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz
|
||||
This is cutadapt 2.6 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_1.fastq.gz
|
||||
Processing reads on 1 core in single-end mode ...
|
||||
Finished in 0.19 s (19 us/read; 3.12 M reads/minute).
|
||||
Finished in 0.64 s (64 us/read; 0.94 M reads/minute).
|
||||
|
||||
=== Summary ===
|
||||
|
||||
|
@ -91,7 +91,7 @@ length count expect max.err error counts
|
|||
67 1 0.0 1 0 1
|
||||
70 2 0.0 1 0 2
|
||||
|
||||
RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz
|
||||
RUN STATISTICS FOR INPUT FILE: test_1.fastq.gz
|
||||
=============================================
|
||||
10000 sequences processed in total
|
||||
|
Binary file not shown.
|
@ -1,14 +1,14 @@
|
|||
|
||||
SUMMARISING RUN PARAMETERS
|
||||
==========================
|
||||
Input filename: test_R2.fastq.gz
|
||||
Input filename: test_2.fastq.gz
|
||||
Trimming mode: paired-end
|
||||
Trim Galore version: 0.6.5
|
||||
Cutadapt version: 2.3
|
||||
Trim Galore version: 0.6.4_dev
|
||||
Cutadapt version: 2.6
|
||||
Number of cores used for trimming: 1
|
||||
Quality Phred score cutoff: 20
|
||||
Quality encoding type selected: ASCII+33
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
|
||||
Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
|
||||
Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
|
||||
Maximum trimming error rate: 0.1 (default)
|
||||
Minimum required adapter overlap (stringency): 1 bp
|
||||
|
@ -16,10 +16,10 @@ Minimum required sequence length for both reads before a sequence pair gets remo
|
|||
Output file will be GZIP compressed
|
||||
|
||||
|
||||
This is cutadapt 2.3 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz
|
||||
This is cutadapt 2.6 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_2.fastq.gz
|
||||
Processing reads on 1 core in single-end mode ...
|
||||
Finished in 0.22 s (22 us/read; 2.71 M reads/minute).
|
||||
Finished in 0.70 s (70 us/read; 0.86 M reads/minute).
|
||||
|
||||
=== Summary ===
|
||||
|
||||
|
@ -91,7 +91,7 @@ length count expect max.err error counts
|
|||
70 1 0.0 1 0 1
|
||||
73 2 0.0 1 0 2
|
||||
|
||||
RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz
|
||||
RUN STATISTICS FOR INPUT FILE: test_2.fastq.gz
|
||||
=============================================
|
||||
10000 sequences processed in total
|
||||
|
Binary file not shown.
|
@ -0,0 +1,61 @@
|
|||
|
||||
SUMMARISING RUN PARAMETERS
|
||||
==========================
|
||||
Input filename: test.fastq.gz
|
||||
Trimming mode: single-end
|
||||
Trim Galore version: 0.6.4_dev
|
||||
Cutadapt version: 2.6
|
||||
Number of cores used for trimming: 1
|
||||
Quality Phred score cutoff: 20
|
||||
Quality encoding type selected: ASCII+33
|
||||
Unable to auto-detect most prominent adapter from the first specified file (count Illumina: 0, count smallRNA: 0, count Nextera: 0)
|
||||
Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).
|
||||
Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
|
||||
Maximum trimming error rate: 0.1 (default)
|
||||
Minimum required adapter overlap (stringency): 1 bp
|
||||
Minimum required sequence length before a sequence gets removed: 20 bp
|
||||
Output file will be GZIP compressed
|
||||
|
||||
|
||||
This is cutadapt 2.6 with Python 3.7.3
|
||||
Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC test.fastq.gz
|
||||
Processing reads on 1 core in single-end mode ...
|
||||
Finished in 0.06 s (28 us/read; 2.13 M reads/minute).
|
||||
|
||||
=== Summary ===
|
||||
|
||||
Total reads processed: 2,052
|
||||
Reads with adapters: 223 (10.9%)
|
||||
Reads written (passing filters): 2,052 (100.0%)
|
||||
|
||||
Total basepairs processed: 103,432 bp
|
||||
Quality-trimmed: 11 bp (0.0%)
|
||||
Total written (filtered): 103,117 bp (99.7%)
|
||||
|
||||
=== Adapter 1 ===
|
||||
|
||||
Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 223 times.
|
||||
|
||||
No. of allowed errors:
|
||||
0-9 bp: 0; 10-13 bp: 1
|
||||
|
||||
Bases preceding removed adapters:
|
||||
A: 31.8%
|
||||
C: 37.7%
|
||||
G: 16.1%
|
||||
T: 14.3%
|
||||
none/other: 0.0%
|
||||
|
||||
Overview of removed sequences
|
||||
length count expect max.err error counts
|
||||
1 190 513.0 0 190
|
||||
2 3 128.2 0 3
|
||||
3 16 32.1 0 16
|
||||
4 10 8.0 0 10
|
||||
5 4 2.0 0 4
|
||||
|
||||
RUN STATISTICS FOR INPUT FILE: test.fastq.gz
|
||||
=============================================
|
||||
2052 sequences processed in total
|
||||
Sequences removed because they became shorter than the length cutoff of 20 bp: 0 (0.0%)
|
||||
|
Binary file not shown.
Loading…
Reference in a new issue