From 45fbbb538c8a9dc10361fe297246873a8197a6ee Mon Sep 17 00:00:00 2001 From: Stephen Kelly <stevekm@users.noreply.github.com> Date: Thu, 16 Jul 2020 08:20:37 -0400 Subject: [PATCH 1/3] add FastQC output validation and md5 checksum hash checks --- software/fastqc/test/main.nf | 77 ++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/software/fastqc/test/main.nf b/software/fastqc/test/main.nf index 0b0038d7..5eeedd35 100755 --- a/software/fastqc/test/main.nf +++ b/software/fastqc/test/main.nf @@ -1,4 +1,5 @@ #!/usr/bin/env nextflow +import java.security.MessageDigest nextflow.preview.dsl = 2 params.out_dir = "test_output" @@ -7,6 +8,40 @@ params.publish_dir_mode = "copy" include { FASTQC } from '../main.nf' + +private static String getFileChecksum(MessageDigest digest, File file) throws IOException +{ + // https://howtodoinjava.com/java/io/how-to-generate-sha-or-md5-file-checksum-hash-in-java/ + //Get file input stream for reading the file content + FileInputStream fis = new FileInputStream(file); + + //Create byte array to read data in chunks + byte[] byteArray = new byte[1024]; + int bytesCount = 0; + + //Read file data and update in message digest + while ((bytesCount = fis.read(byteArray)) != -1) { + digest.update(byteArray, 0, bytesCount); + }; + + //close the stream; We don't need it now. + fis.close(); + + //Get the hash's bytes + byte[] bytes = digest.digest(); + + //This bytes[] has bytes in decimal format; + //Convert it to hexadecimal format + StringBuilder sb = new StringBuilder(); + for(int i=0; i< bytes.length ;i++) + { + sb.append(Integer.toString((bytes[i] & 0xff) + 0x100, 16).substring(1)); + } + + //return complete hash + return sb.toString(); +} + /** * Test if FASTQC runs with single-end data */ @@ -14,6 +49,22 @@ workflow test_single_end { input_files = Channel.fromPath("data/test_single_end.fastq.gz") .map {f -> [f.baseName, true, f]} FASTQC(input_files) + + // test that the output looks as expected + FASTQC.out.html.map { name, is_single_end, html_file -> + html_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_file}")); + + assert name == "test_single_end.fastq" + assert is_single_end == true + assert html_file.getName() == "test_single_end.fastq_fastqc.html" + assert html_hash == "ff04679b50beabdbd9e93db646f5667d" + } + FASTQC.out.zip.map { name, is_single_end, zip_file -> + // NOTE: output zip files do not have a consistent hash + assert name == "test_single_end.fastq" + assert is_single_end == true + assert zip_file.getName() == "test_single_end.fastq_fastqc.zip" + } } /** @@ -23,6 +74,32 @@ workflow test_paired_end { input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz") .map {f -> [f[0], false, f[1]]} FASTQC(input_files) + + // test that the output looks as expected + FASTQC.out.html.map { name, is_single_end, html_files -> + html_r1 = html_files[0] + html_r2 = html_files[1] + + html_r1_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r1}")); + html_r2_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r2}")); + + assert name == "test_R" + assert is_single_end == false + assert html_r1.getName() == "test_R_1_fastqc.html" + assert html_r2.getName() == "test_R_2_fastqc.html" + assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0" + assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04" + } + FASTQC.out.zip.map { name, is_single_end, zip_files -> + zip_r1 = zip_files[0] + zip_r2 = zip_files[1] + // NOTE: output zip files do not have a consistent hash + + assert name == "test_R" + assert is_single_end == false + assert zip_r1.getName() == "test_R_1_fastqc.zip" + assert zip_r2.getName() == "test_R_2_fastqc.zip" + } } workflow { From 61d44c34d0cd61c3203cec5b9fb5686c41c87b80 Mon Sep 17 00:00:00 2001 From: Stephen Kelly <stevekm@users.noreply.github.com> Date: Thu, 16 Jul 2020 08:31:19 -0400 Subject: [PATCH 2/3] update GitHub actions for Fastqc --- .github/workflows/fastqc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/fastqc.yml b/.github/workflows/fastqc.yml index 6888bbc0..65d6760f 100644 --- a/.github/workflows/fastqc.yml +++ b/.github/workflows/fastqc.yml @@ -2,7 +2,7 @@ name: FastQC on: push: {} pull_request: - paths: software/fastqc/* + paths: software/fastqc/** jobs: run_ci_test: From 4957eb98e5f07a62aa5402a6280a8a0395f162d2 Mon Sep 17 00:00:00 2001 From: Stephen Kelly <stevekm@users.noreply.github.com> Date: Thu, 16 Jul 2020 08:49:08 -0400 Subject: [PATCH 3/3] update GitHub actions for Fastqc --- .github/workflows/fastqc.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/fastqc.yml b/.github/workflows/fastqc.yml index 65d6760f..66a14c7c 100644 --- a/.github/workflows/fastqc.yml +++ b/.github/workflows/fastqc.yml @@ -1,6 +1,7 @@ name: FastQC on: - push: {} + push: + paths: software/fastqc/** pull_request: paths: software/fastqc/**