Merge pull request #58 from drpatelh/master

Get tests working for FastQC
2024-12-31 03:32:10 -05:00 · 2020-08-07 10:11:53 +01:00 · 2020-08-07 10:11:53 +01:00 · 5af5fc395e
commit 5af5fc395e
parent 526ad8a457 6255d9b00c
22 changed files with 697 additions and 837 deletions
--- a/.github/workflows/deprecated/fastqc.yml
+++ b/.github/workflows/deprecated/fastqc.yml
@ -1,4 +1,4 @@
-name: FastQC
+name: fastqc
 on:
  push:
    paths:
@ -12,7 +12,7 @@ on:
      - tests

 jobs:
-  run_ci_test:
+  ci_test:
    runs-on: ubuntu-latest
    env:
      NXF_ANSI_LOG: false
@ -22,9 +22,9 @@ jobs:

    - name: Install Nextflow
      run: |
-        export NXF_VER="20.06.0-edge"
+        export NXF_VER="20.07.1"
        wget -qO- get.nextflow.io | bash
        sudo mv nextflow /usr/local/bin/

    # Test the module
-    - run: nextflow run ./software/fastqc/test/
+    - run: nextflow run ./software/fastqc/test/ -profile docker
--- a/README.md
+++ b/README.md
@ -200,12 +200,14 @@ We also use a standardised parameter called `params.publish_dir_mode` that can b

 - Test files MUST be kept as tiny as possible.

- It is RECOMMENDED to re-use generic files from [`tests/data/`](tests/data/) by symlinking them into the [`test/`](software/fastqc/test) directory of the module.
-
- If the appropriate test data doesn't exist for your module then it MUST be added to [`tests/data`](tests/data/).
-
 - Every module MUST be tested by adding a test workflow with a toy dataset in the [`test/`](software/fastqc/test) directory of the module.

+- Generic files from [`tests/data/`](tests/data/) SHOULD be reused by symlinking them into the [`test/input/`](software/fastqc/test/input/) directory of the module.
+
+- Any outputs produced by the test workflow SHOULD be placed in a folder called [`test/output/`](software/fastqc/test/output/) so that they can be used for unit testing.
+
+- If the appropriate test data doesn't exist for your module then it MUST be added to [`tests/data/`](tests/data/).
+
 ### Documentation

 - A module MUST be documented in the [`meta.yml`](software/fastqc/meta.yml) file. It MUST document `params`, `input` and `output`. `input` and `output` MUST be a nested list.
--- a/software/fastqc/meta.yml
+++ b/software/fastqc/meta.yml
@ -1,4 +1,4 @@
-name: FastQC
+name: fastqc
 description: Run FastQC on sequenced reads
 keywords:
  - Quality Control
@ -15,37 +15,27 @@ tools:
      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
 params:
-  - fastqc_args:
-      type: string
-      description: Additional command line arguments passed to fastqc.
-  - out_dir:
+  - outdir:
      type: string
      description: |
        The pipeline's output directory. By default, the module will
-        output files into `$out_dir/MODULE_NAME`
-  - publish_dir:
-      type: string
-      description: |
-        Append to the path for the standard output directory provided by `$out_dir`.
+        output files into `$params.outdir/<SOFTWARE>`
  - publish_dir_mode:
      type: string
      description: |
-        Provide a value for the Nextflow `publishDir` mode parameter
-        (e.g. copy, link, ...)
-  - publish_results:
-      type: string
-      description: |
-        Whether or not to publish results into `publish_dir`. Set to `none` to not
-        publish any files at all; to `default` to publish all relevant files.
-input:
-  - name:
-      type: string
-      description: Sample identifier
-  - single_end:
+        Value for the Nextflow `publishDir` mode parameter.
+        Available: symlink, rellink, link, copy, copyNoFollow, move.
+  - conda:
      type: boolean
      description: |
-        Boolean indicating whether the corresponding sample is single-end (true)
-        or paired-end (false).
+        Run the module with Conda using the software specified
+        via the `conda` directive
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
  - reads:
      type: file
      description: |
@ -57,7 +47,7 @@ output:
      description: FastQC report
      pattern: "*_fastqc.{zip,html}"
 authors:
-  - "@grst"
  - "@drpatelh"
+  - "@grst"
  - "@ewels"
  - "@FelixKrueger"
--- a/software/fastqc/test/deprecated/lib
+++ b/software/fastqc/test/deprecated/lib
@ -0,0 +1 @@
+../../../lib/
--- a/software/fastqc/test/deprecated/main.nf
+++ b/software/fastqc/test/deprecated/main.nf
@ -0,0 +1,76 @@
+#!/usr/bin/env nextflow
+import checksum
+nextflow.preview.dsl = 2
+
+params.out_dir = "test_output"
+params.fastqc_args = ''
+params.publish_dir_mode = "copy"
+
+include { FASTQC } from '../main.nf'
+
+/**
+ * Test if FASTQC runs with single-end data
+ */
+workflow test_single_end {
+    input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
+                    .map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
+    FASTQC(input_files)
+
+    // test that the output looks as expected
+    FASTQC.out.html.map { name, is_single_end, html_file ->
+        html_hash = checksum.getMD5(new File("${html_file}"));
+
+        assert name == "test_single_end"
+        assert is_single_end == true
+        assert html_file.getName() == "test_single_end_fastqc.html"
+        // Hash seems to vary between local runs and GitHub Actions
+        // TODO: Might be solved when using Docker for tests?
+        // assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
+    }
+    FASTQC.out.zip.map { name, is_single_end, zip_file ->
+        // NOTE: output zip files do not have a consistent hash
+        assert name == "test_single_end"
+        assert is_single_end == true
+        assert zip_file.getName() == "test_single_end_fastqc.zip"
+    }
+}
+
+/**
+ * Test if FASTQC runs with paired end data
+ */
+workflow test_paired_end {
+    input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
+                    .map {f -> [f[0], false, f[1]]}
+    FASTQC(input_files)
+
+    // test that the output looks as expected
+    FASTQC.out.html.map { name, is_single_end, html_files ->
+        html_r1 = html_files[0]
+        html_r2 = html_files[1]
+
+        html_r1_hash = checksum.getMD5(new File("${html_r1}"));
+        html_r2_hash = checksum.getMD5(new File("${html_r2}"));
+
+        assert name == "test_R"
+        assert is_single_end == false
+        assert html_r1.getName() == "test_R_1_fastqc.html"
+        assert html_r2.getName() == "test_R_2_fastqc.html"
+        assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
+        assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
+    }
+    FASTQC.out.zip.map { name, is_single_end, zip_files ->
+        zip_r1 = zip_files[0]
+        zip_r2 = zip_files[1]
+        // NOTE: output zip files do not have a consistent hash
+
+        assert name == "test_R"
+        assert is_single_end == false
+        assert zip_r1.getName() == "test_R_1_fastqc.zip"
+        assert zip_r2.getName() == "test_R_2_fastqc.zip"
+    }
+}
+
+workflow {
+    test_single_end()
+    test_paired_end()
+}
--- a/software/fastqc/test/lib
+++ b/software/fastqc/test/lib
@ -1 +0,0 @@
-../../../lib
--- a/software/fastqc/test/main.nf
+++ b/software/fastqc/test/main.nf
@ -1,73 +1,32 @@
 #!/usr/bin/env nextflow
-import checksum
-nextflow.preview.dsl = 2

-params.out_dir = "test_output"
-params.fastqc_args = ''
-params.publish_dir_mode = "copy"
+nextflow.enable.dsl = 2

 include { FASTQC } from '../main.nf'

-/**
- * Test if FASTQC runs with single-end data
+/*
+ * Test with single-end data
 */
 workflow test_single_end {
-    input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
-                    .map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
-    FASTQC(input_files)

-    // test that the output looks as expected
-    FASTQC.out.html.map { name, is_single_end, html_file ->
-        html_hash = checksum.getMD5(new File("${html_file}"));
+    def input = []
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]

-        assert name == "test_single_end"
-        assert is_single_end == true
-        assert html_file.getName() == "test_single_end_fastqc.html"
-        // Hash seems to vary between local runs and GitHub Actions
-        // TODO: Might be solved when using Docker for tests?
-        // assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
-    }
-    FASTQC.out.zip.map { name, is_single_end, zip_file ->
-        // NOTE: output zip files do not have a consistent hash
-        assert name == "test_single_end"
-        assert is_single_end == true
-        assert zip_file.getName() == "test_single_end_fastqc.zip"
-    }
+    FASTQC ( input, [ publish_dir:'test_single_end' ] )
 }

-/**
- * Test if FASTQC runs with paired end data
+/*
+ * Test with paired-end data
 */
 workflow test_paired_end {
-    input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
-                    .map {f -> [f[0], false, f[1]]}
-    FASTQC(input_files)

-    // test that the output looks as expected
-    FASTQC.out.html.map { name, is_single_end, html_files ->
-        html_r1 = html_files[0]
-        html_r2 = html_files[1]
+    def input = []
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
+                file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]

-        html_r1_hash = checksum.getMD5(new File("${html_r1}"));
-        html_r2_hash = checksum.getMD5(new File("${html_r2}"));
-
-        assert name == "test_R"
-        assert is_single_end == false
-        assert html_r1.getName() == "test_R_1_fastqc.html"
-        assert html_r2.getName() == "test_R_2_fastqc.html"
-        assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
-        assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
-    }
-    FASTQC.out.zip.map { name, is_single_end, zip_files ->
-        zip_r1 = zip_files[0]
-        zip_r2 = zip_files[1]
-        // NOTE: output zip files do not have a consistent hash
-
-        assert name == "test_R"
-        assert is_single_end == false
-        assert zip_r1.getName() == "test_R_1_fastqc.zip"
-        assert zip_r2.getName() == "test_R_2_fastqc.zip"
-    }
+    FASTQC ( input, [ publish_dir:'test_paired_end' ] )
 }

 workflow {
--- a/software/fastqc/test/nextflow.config
+++ b/software/fastqc/test/nextflow.config
@ -0,0 +1,20 @@
+
+params {
+  outdir = "output/"
+  publish_dir_mode = "copy"
+  conda = false
+}
+
+profiles {
+  conda  {
+    params.conda = true
+  }
+  docker {
+    docker.enabled = true
+    docker.runOptions = '-u \$(id -u):\$(id -g)'
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.autoMounts = true
+  }
+}
--- a/software/fastqc/test/output/test_R1_fastqc.html
+++ b/software/fastqc/test/output/test_R1_fastqc.html
--- a/software/fastqc/test/output/test_R1_fastqc.zip
+++ b/software/fastqc/test/output/test_R1_fastqc.zip
--- a/software/fastqc/test/output/test_R1_val_1_fastqc.html
+++ b/software/fastqc/test/output/test_R1_val_1_fastqc.html
--- a/software/fastqc/test/output/test_R1_val_1_fastqc.zip
+++ b/software/fastqc/test/output/test_R1_val_1_fastqc.zip
--- a/software/fastqc/test/output/test_R2_fastqc.html
+++ b/software/fastqc/test/output/test_R2_fastqc.html
--- a/software/fastqc/test/output/test_R2_fastqc.zip
+++ b/software/fastqc/test/output/test_R2_fastqc.zip
--- a/software/fastqc/test/output/test_R2_val_2_fastqc.html
+++ b/software/fastqc/test/output/test_R2_val_2_fastqc.html
--- a/software/fastqc/test/output/test_R2_val_2_fastqc.zip
+++ b/software/fastqc/test/output/test_R2_val_2_fastqc.zip
--- a/software/fastqc/test/output/test_paired_end/test_1_fastqc.html
+++ b/software/fastqc/test/output/test_paired_end/test_1_fastqc.html
--- a/software/fastqc/test/output/test_paired_end/test_1_fastqc.zip
+++ b/software/fastqc/test/output/test_paired_end/test_1_fastqc.zip
--- a/software/fastqc/test/output/test_paired_end/test_2_fastqc.html
+++ b/software/fastqc/test/output/test_paired_end/test_2_fastqc.html
--- a/software/fastqc/test/output/test_paired_end/test_2_fastqc.zip
+++ b/software/fastqc/test/output/test_paired_end/test_2_fastqc.zip
--- a/software/fastqc/test/output/test_single_end/test_fastqc.html
+++ b/software/fastqc/test/output/test_single_end/test_fastqc.html
--- a/software/fastqc/test/output/test_single_end/test_fastqc.zip
+++ b/software/fastqc/test/output/test_single_end/test_fastqc.zip