Merge pull request #60 from drpatelh/master

Add docs and tests for TrimGalore!
2024-12-22 19:18:17 +00:00 · 2020-08-07 12:27:43 +01:00 · 2020-08-07 12:27:43 +01:00 · ad4151703f
commit ad4151703f
parent 7c900e2d17 9f4192d433
15 changed files with 227 additions and 79 deletions
--- a/.github/workflows/deprecated/trim_galore.yml
+++ b/.github/workflows/deprecated/trim_galore.yml
@ -1,18 +1,18 @@
-name: Trim Galore!
+name: trimgalore
 on:
  push:
    paths:
-      - software/trim_galore/**
-      - .github/workflows/trim_galore.yml
+      - software/trimgalore/**
+      - .github/workflows/trimgalore.yml
      - tests
  pull_request:
    paths:
-      - software/trim_galore/**
-      - .github/workflows/trim_galore.yml
+      - software/trimgalore/**
+      - .github/workflows/trimgalore.yml
      - tests

 jobs:
-  run_ci_test:
+  ci_test:
    runs-on: ubuntu-latest
    env:
      NXF_ANSI_LOG: false
@ -22,8 +22,9 @@ jobs:

    - name: Install Nextflow
      run: |
+        export NXF_VER="20.07.1"
        wget -qO- get.nextflow.io | bash
        sudo mv nextflow /usr/local/bin/

    # Test the module
-    - run: nextflow run ./software/trim_galore/test/
+    - run: nextflow run ./software/trimgalore/test/ -profile docker
--- a/software/fastqc/test/input/test_R1_val_1.fq.gz
+++ b/software/fastqc/test/input/test_R1_val_1.fq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
--- a/software/fastqc/test/input/test_R2_val_2.fq.gz
+++ b/software/fastqc/test/input/test_R2_val_2.fq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
--- a/software/trimgalore/meta.yml
+++ b/software/trimgalore/meta.yml
@ -1,40 +1,98 @@
-name: Trim Galore!
+name: trimgalore
 description: Trim FastQ files using Trim Galore!
 keywords:
-    - trimming
-    - adapters
-    - sequencing adapters
+  - trimming
+  - adapters
+  - sequencing adapters
 tools:
-    - fastqc:
-        description: |
-            A wrapper tool around Cutadapt and FastQC to consistently apply quality
-            and adapter trimming to FastQ files, with some extra functionality for
-            MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
-        homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
-        documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
+  - trimgalore:
+      description: |
+        A wrapper tool around Cutadapt and FastQC to consistently apply quality
+        and adapter trimming to FastQ files, with some extra functionality for
+        MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
+      homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+      documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
+params:
+  - outdir:
+      type: string
+      description: |
+        The pipeline's output directory. By default, the module will
+        output files into `$params.outdir/<SOFTWARE>`
+  - publish_dir_mode:
+      type: string
+      description: |
+        Value for the Nextflow `publishDir` mode parameter.
+        Available: symlink, rellink, link, copy, copyNoFollow, move.
+  - conda:
+      type: boolean
+      description: |
+        Run the module with Conda using the software specified
+        via the `conda` directive
+  - clip_r1:
+      type: integer
+      description: |
+        Instructs Trim Galore to remove bp from the 5' end of read 1
+        (or single-end reads)
+  - clip_r2:
+      type: integer
+      description: |
+        Instructs Trim Galore to remove bp from the 5' end of read 2
+        (paired-end reads only)
+  - three_prime_clip_r1:
+      type: integer
+      description: |
+        Instructs Trim Galore to remove bp from the 3' end of read 1
+        AFTER adapter/quality trimming has been performed
+  - three_prime_clip_r2:
+      type: integer
+      description: |
+        Instructs Trim Galore to re move bp from the 3' end of read 2
+        AFTER adapter/quality trimming has been performed
 input:
-    -
-        - sample_id:
-            type: string
-            description: Sample identifier
-        - reads:
-            type: file
-            description: Input FastQ file, or pair of files
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
+  - options:
+      type: map
+      description: |
+        Groovy Map containing module options for passing command-line arguments and
+        output file paths.
 output:
-    -
-        - sample_id:
-            type: string
-            description: Sample identifier
-        - trimmed_fastq:
-            type: file
-            description: Trimmed FastQ files
-            pattern: "*fq.gz"
-    -
-        - report:
-            type: file
-            description: Trim Galore! trimming report
-            pattern: "*trimming_report.txt"
-
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input adapter trimmed FastQ files of size 1 and 2 for
+        single-end and paired-end data, respectively.
+      pattern: "*.fq.gz"
+  - html:
+      type: file
+      description: FastQC report (optional)
+      pattern: "*_fastqc.html"
+  - zip:
+      type: file
+      description: FastQC report archive (optional)
+      pattern: "*_fastqc.zip"
+  - log:
+      type: file
+      description: Trim Galore! trimming report
+      pattern: "*report.txt"
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "*.version.txt"
 authors:
+    - "@drpatelh"
    - "@ewels"
    - "@FelixKrueger"
--- a/software/trimgalore/test/input/test_single_end.fastq.gz
+++ b/software/trimgalore/test/input/test_single_end.fastq.gz
@ -0,0 +1 @@
+../../../../tests/data/fastq/rna/test_single_end.fastq.gz
--- a/software/trimgalore/test/main.nf
+++ b/software/trimgalore/test/main.nf
@ -1,27 +1,35 @@
 #!/usr/bin/env nextflow
-nextflow.preview.dsl=2

-params.outdir = "."             // gets set in the nextflow.config files (to './results/trim_galore')
-params.verbose = false
-params.trim_galore_args = ''
-// trim_galore_args are best passed into the workflow in the following manner, e.g.:
-// --trim_galore_args="--clip_r1 10 --clip_r2 15 -j 2"
+nextflow.enable.dsl = 2

-if (params.verbose){
-    println ("[WORKFLOW] TRIM GALORE ARGS: "      + params.trim_galore_args)
+include { TRIMGALORE } from '../main.nf'
+
+/*
+ * Test with single-end data
+ */
+workflow test_single_end {
+
+    def input = []
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
+
+    TRIMGALORE ( input, [ publish_dir:'test_single_end' ] )
 }

-// TODO: check the output files in some way
-// include '../../../tests/functions/check_process_outputs.nf'
-include '../main.nf'  // params (clip_r1: 6, clip_r2: 10) // how to pass additional parameters
+/*
+ * Test with paired-end data
+ */
+workflow test_paired_end {

-ch_read_files = Channel
-  .fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
-  // .view()  // to check whether the input channel works
+    def input = []
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
+                file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]
+
+    TRIMGALORE ( input, [ publish_dir:'test_paired_end' ] )
+}

 workflow {
-
-    main:
-        TRIM_GALORE (ch_read_files, params.outdir, params.trim_galore_args, params.verbose)
-
+    test_single_end()
+    test_paired_end()
 }
--- a/software/trimgalore/test/nextflow.config
+++ b/software/trimgalore/test/nextflow.config
@ -1,2 +1,25 @@
-// docker.enabled = true
-params.outdir = './results'
+
+params {
+  outdir = "output/"
+  publish_dir_mode = "copy"
+  conda = false
+
+  clip_r1 = 0
+  clip_r2 = 0
+  three_prime_clip_r1 = 0
+  three_prime_clip_r2 = 0
+}
+
+profiles {
+  conda  {
+    params.conda = true
+  }
+  docker {
+    docker.enabled = true
+    docker.runOptions = '-u \$(id -u):\$(id -g)'
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.autoMounts = true
+  }
+}
--- a/software/trimgalore/test/output/test_R1_val_1.fq.gz
+++ b/software/trimgalore/test/output/test_R1_val_1.fq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz
--- a/software/trimgalore/test/output/test_R2_val_2.fq.gz
+++ b/software/trimgalore/test/output/test_R2_val_2.fq.gz
@ -1 +0,0 @@
-../../../../tests/data/fastq/rna/test_R2_val_2.fq.gz
--- a/software/trimgalore/test/output/test_paired_end/test_1.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_paired_end/test_1.fastq.gz_trimming_report.txt
@ -1,14 +1,14 @@

 SUMMARISING RUN PARAMETERS
 ==========================
-Input filename: test_R1.fastq.gz
+Input filename: test_1.fastq.gz
 Trimming mode: paired-end
-Trim Galore version: 0.6.5
-Cutadapt version: 2.3
+Trim Galore version: 0.6.4_dev
+Cutadapt version: 2.6
 Number of cores used for trimming: 1
 Quality Phred score cutoff: 20
 Quality encoding type selected: ASCII+33
-Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
+Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
 Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
 Maximum trimming error rate: 0.1 (default)
 Minimum required adapter overlap (stringency): 1 bp
@ -16,10 +16,10 @@ Minimum required sequence length for both reads before a sequence pair gets remo
 Output file will be GZIP compressed


-This is cutadapt 2.3 with Python 3.7.3
-Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R1.fastq.gz
+This is cutadapt 2.6 with Python 3.7.3
+Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_1.fastq.gz
 Processing reads on 1 core in single-end mode ...
-Finished in 0.19 s (19 us/read; 3.12 M reads/minute).
+Finished in 0.64 s (64 us/read; 0.94 M reads/minute).

 === Summary ===

@ -91,7 +91,7 @@ length	count	expect	max.err	error counts
 67	1	0.0	1	0 1
 70	2	0.0	1	0 2

-RUN STATISTICS FOR INPUT FILE: test_R1.fastq.gz
+RUN STATISTICS FOR INPUT FILE: test_1.fastq.gz
 =============================================
 10000 sequences processed in total

--- a/software/trimgalore/test/output/test_paired_end/test_1_val_1.fq.gz
+++ b/software/trimgalore/test/output/test_paired_end/test_1_val_1.fq.gz
--- a/software/trimgalore/test/output/test_paired_end/test_2.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_paired_end/test_2.fastq.gz_trimming_report.txt
@ -1,14 +1,14 @@

 SUMMARISING RUN PARAMETERS
 ==========================
-Input filename: test_R2.fastq.gz
+Input filename: test_2.fastq.gz
 Trimming mode: paired-end
-Trim Galore version: 0.6.5
-Cutadapt version: 2.3
+Trim Galore version: 0.6.4_dev
+Cutadapt version: 2.6
 Number of cores used for trimming: 1
 Quality Phred score cutoff: 20
 Quality encoding type selected: ASCII+33
-Using Nextera adapter for trimming (count: 83). Second best hit was smallRNA (count: 0)
+Using Nextera adapter for trimming (count: 83). Second best hit was Illumina (count: 0)
 Adapter sequence: 'CTGTCTCTTATA' (Nextera Transposase sequence; auto-detected)
 Maximum trimming error rate: 0.1 (default)
 Minimum required adapter overlap (stringency): 1 bp
@ -16,10 +16,10 @@ Minimum required sequence length for both reads before a sequence pair gets remo
 Output file will be GZIP compressed


-This is cutadapt 2.3 with Python 3.7.3
-Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_R2.fastq.gz
+This is cutadapt 2.6 with Python 3.7.3
+Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a CTGTCTCTTATA test_2.fastq.gz
 Processing reads on 1 core in single-end mode ...
-Finished in 0.22 s (22 us/read; 2.71 M reads/minute).
+Finished in 0.70 s (70 us/read; 0.86 M reads/minute).

 === Summary ===

@ -91,7 +91,7 @@ length	count	expect	max.err	error counts
 70	1	0.0	1	0 1
 73	2	0.0	1	0 2

-RUN STATISTICS FOR INPUT FILE: test_R2.fastq.gz
+RUN STATISTICS FOR INPUT FILE: test_2.fastq.gz
 =============================================
 10000 sequences processed in total

--- a/software/trimgalore/test/output/test_paired_end/test_2_val_2.fq.gz
+++ b/software/trimgalore/test/output/test_paired_end/test_2_val_2.fq.gz
--- a/software/trimgalore/test/output/test_single_end/test.fastq.gz_trimming_report.txt
+++ b/software/trimgalore/test/output/test_single_end/test.fastq.gz_trimming_report.txt
@ -0,0 +1,61 @@
+
+SUMMARISING RUN PARAMETERS
+==========================
+Input filename: test.fastq.gz
+Trimming mode: single-end
+Trim Galore version: 0.6.4_dev
+Cutadapt version: 2.6
+Number of cores used for trimming: 1
+Quality Phred score cutoff: 20
+Quality encoding type selected: ASCII+33
+Unable to auto-detect most prominent adapter from the first specified file (count Illumina: 0, count smallRNA: 0, count Nextera: 0)
+Defaulting to Illumina universal adapter ( AGATCGGAAGAGC ). Specify -a SEQUENCE to avoid this behavior).
+Adapter sequence: 'AGATCGGAAGAGC' (Illumina TruSeq, Sanger iPCR; default (inconclusive auto-detection))
+Maximum trimming error rate: 0.1 (default)
+Minimum required adapter overlap (stringency): 1 bp
+Minimum required sequence length before a sequence gets removed: 20 bp
+Output file will be GZIP compressed
+
+
+This is cutadapt 2.6 with Python 3.7.3
+Command line parameters: -j 1 -e 0.1 -q 20 -O 1 -a AGATCGGAAGAGC test.fastq.gz
+Processing reads on 1 core in single-end mode ...
+Finished in 0.06 s (28 us/read; 2.13 M reads/minute).
+
+=== Summary ===
+
+Total reads processed:                   2,052
+Reads with adapters:                       223 (10.9%)
+Reads written (passing filters):         2,052 (100.0%)
+
+Total basepairs processed:       103,432 bp
+Quality-trimmed:                      11 bp (0.0%)
+Total written (filtered):        103,117 bp (99.7%)
+
+=== Adapter 1 ===
+
+Sequence: AGATCGGAAGAGC; Type: regular 3'; Length: 13; Trimmed: 223 times.
+
+No. of allowed errors:
+0-9 bp: 0; 10-13 bp: 1
+
+Bases preceding removed adapters:
+  A: 31.8%
+  C: 37.7%
+  G: 16.1%
+  T: 14.3%
+  none/other: 0.0%
+
+Overview of removed sequences
+length	count	expect	max.err	error counts
+1	190	513.0	0	190
+2	3	128.2	0	3
+3	16	32.1	0	16
+4	10	8.0	0	10
+5	4	2.0	0	4
+
+RUN STATISTICS FOR INPUT FILE: test.fastq.gz
+=============================================
+2052 sequences processed in total
+Sequences removed because they became shorter than the length cutoff of 20 bp:	0 (0.0%)
+
--- a/software/trimgalore/test/output/test_single_end/test_trimmed.fq.gz
+++ b/software/trimgalore/test/output/test_single_end/test_trimmed.fq.gz
				`@ -1 +0,0 @@`
				`../../../../tests/data/fastq/rna/test_R1_val_1.fq.gz`
				`@ -0,0 +1 @@`
				`../../../../tests/data/fastq/rna/test_single_end.fastq.gz`