From 45fbbb538c8a9dc10361fe297246873a8197a6ee Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Thu, 16 Jul 2020 08:20:37 -0400
Subject: [PATCH 1/3] add FastQC output validation and md5 checksum hash checks

---
 software/fastqc/test/main.nf | 77 ++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/software/fastqc/test/main.nf b/software/fastqc/test/main.nf
index 0b0038d7..5eeedd35 100755
--- a/software/fastqc/test/main.nf
+++ b/software/fastqc/test/main.nf
@@ -1,4 +1,5 @@
 #!/usr/bin/env nextflow
+import java.security.MessageDigest
 nextflow.preview.dsl = 2
 
 params.out_dir = "test_output"
@@ -7,6 +8,40 @@ params.publish_dir_mode = "copy"
 
 include { FASTQC } from '../main.nf'
 
+
+private static String getFileChecksum(MessageDigest digest, File file) throws IOException
+{
+    // https://howtodoinjava.com/java/io/how-to-generate-sha-or-md5-file-checksum-hash-in-java/
+    //Get file input stream for reading the file content
+    FileInputStream fis = new FileInputStream(file);
+
+    //Create byte array to read data in chunks
+    byte[] byteArray = new byte[1024];
+    int bytesCount = 0;
+
+    //Read file data and update in message digest
+    while ((bytesCount = fis.read(byteArray)) != -1) {
+        digest.update(byteArray, 0, bytesCount);
+    };
+
+    //close the stream; We don't need it now.
+    fis.close();
+
+    //Get the hash's bytes
+    byte[] bytes = digest.digest();
+
+    //This bytes[] has bytes in decimal format;
+    //Convert it to hexadecimal format
+    StringBuilder sb = new StringBuilder();
+    for(int i=0; i< bytes.length ;i++)
+    {
+        sb.append(Integer.toString((bytes[i] & 0xff) + 0x100, 16).substring(1));
+    }
+
+    //return complete hash
+   return sb.toString();
+}
+
 /**
  * Test if FASTQC runs with single-end data
  */
@@ -14,6 +49,22 @@ workflow test_single_end {
     input_files = Channel.fromPath("data/test_single_end.fastq.gz")
                     .map {f -> [f.baseName, true, f]}
     FASTQC(input_files)
+
+    // test that the output looks as expected
+    FASTQC.out.html.map { name, is_single_end, html_file ->
+        html_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_file}"));
+
+        assert name == "test_single_end.fastq"
+        assert is_single_end == true
+        assert html_file.getName() == "test_single_end.fastq_fastqc.html"
+        assert html_hash == "ff04679b50beabdbd9e93db646f5667d"
+    }
+    FASTQC.out.zip.map { name, is_single_end, zip_file ->
+        // NOTE: output zip files do not have a consistent hash
+        assert name == "test_single_end.fastq"
+        assert is_single_end == true
+        assert zip_file.getName() == "test_single_end.fastq_fastqc.zip"
+    }
 }
 
 /**
@@ -23,6 +74,32 @@ workflow test_paired_end {
     input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
                     .map {f -> [f[0], false, f[1]]}
     FASTQC(input_files)
+
+    // test that the output looks as expected
+    FASTQC.out.html.map { name, is_single_end, html_files ->
+        html_r1 = html_files[0]
+        html_r2 = html_files[1]
+
+        html_r1_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r1}"));
+        html_r2_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r2}"));
+
+        assert name == "test_R"
+        assert is_single_end == false
+        assert html_r1.getName() == "test_R_1_fastqc.html"
+        assert html_r2.getName() == "test_R_2_fastqc.html"
+        assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
+        assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
+    }
+    FASTQC.out.zip.map { name, is_single_end, zip_files ->
+        zip_r1 = zip_files[0]
+        zip_r2 = zip_files[1]
+        // NOTE: output zip files do not have a consistent hash
+
+        assert name == "test_R"
+        assert is_single_end == false
+        assert zip_r1.getName() == "test_R_1_fastqc.zip"
+        assert zip_r2.getName() == "test_R_2_fastqc.zip"
+    }
 }
 
 workflow {

From 61d44c34d0cd61c3203cec5b9fb5686c41c87b80 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Thu, 16 Jul 2020 08:31:19 -0400
Subject: [PATCH 2/3] update GitHub actions for Fastqc

---
 .github/workflows/fastqc.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/fastqc.yml b/.github/workflows/fastqc.yml
index 6888bbc0..65d6760f 100644
--- a/.github/workflows/fastqc.yml
+++ b/.github/workflows/fastqc.yml
@@ -2,7 +2,7 @@ name: FastQC
 on:
   push: {}
   pull_request:
-    paths: software/fastqc/*
+    paths: software/fastqc/**
 
 jobs:
   run_ci_test:

From 4957eb98e5f07a62aa5402a6280a8a0395f162d2 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Thu, 16 Jul 2020 08:49:08 -0400
Subject: [PATCH 3/3] update GitHub actions for Fastqc

---
 .github/workflows/fastqc.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/fastqc.yml b/.github/workflows/fastqc.yml
index 65d6760f..66a14c7c 100644
--- a/.github/workflows/fastqc.yml
+++ b/.github/workflows/fastqc.yml
@@ -1,6 +1,7 @@
 name: FastQC
 on:
-  push: {}
+  push:
+    paths: software/fastqc/**
   pull_request:
     paths: software/fastqc/**