mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2025-01-18 02:46:13 -05:00
add FastQC output validation and md5 checksum hash checks
This commit is contained in:
parent
6028bb080b
commit
45fbbb538c
1 changed files with 77 additions and 0 deletions
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env nextflow
|
#!/usr/bin/env nextflow
|
||||||
|
import java.security.MessageDigest
|
||||||
nextflow.preview.dsl = 2
|
nextflow.preview.dsl = 2
|
||||||
|
|
||||||
params.out_dir = "test_output"
|
params.out_dir = "test_output"
|
||||||
|
@ -7,6 +8,40 @@ params.publish_dir_mode = "copy"
|
||||||
|
|
||||||
include { FASTQC } from '../main.nf'
|
include { FASTQC } from '../main.nf'
|
||||||
|
|
||||||
|
|
||||||
|
private static String getFileChecksum(MessageDigest digest, File file) throws IOException
|
||||||
|
{
|
||||||
|
// https://howtodoinjava.com/java/io/how-to-generate-sha-or-md5-file-checksum-hash-in-java/
|
||||||
|
//Get file input stream for reading the file content
|
||||||
|
FileInputStream fis = new FileInputStream(file);
|
||||||
|
|
||||||
|
//Create byte array to read data in chunks
|
||||||
|
byte[] byteArray = new byte[1024];
|
||||||
|
int bytesCount = 0;
|
||||||
|
|
||||||
|
//Read file data and update in message digest
|
||||||
|
while ((bytesCount = fis.read(byteArray)) != -1) {
|
||||||
|
digest.update(byteArray, 0, bytesCount);
|
||||||
|
};
|
||||||
|
|
||||||
|
//close the stream; We don't need it now.
|
||||||
|
fis.close();
|
||||||
|
|
||||||
|
//Get the hash's bytes
|
||||||
|
byte[] bytes = digest.digest();
|
||||||
|
|
||||||
|
//This bytes[] has bytes in decimal format;
|
||||||
|
//Convert it to hexadecimal format
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for(int i=0; i< bytes.length ;i++)
|
||||||
|
{
|
||||||
|
sb.append(Integer.toString((bytes[i] & 0xff) + 0x100, 16).substring(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
//return complete hash
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test if FASTQC runs with single-end data
|
* Test if FASTQC runs with single-end data
|
||||||
*/
|
*/
|
||||||
|
@ -14,6 +49,22 @@ workflow test_single_end {
|
||||||
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
|
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
|
||||||
.map {f -> [f.baseName, true, f]}
|
.map {f -> [f.baseName, true, f]}
|
||||||
FASTQC(input_files)
|
FASTQC(input_files)
|
||||||
|
|
||||||
|
// test that the output looks as expected
|
||||||
|
FASTQC.out.html.map { name, is_single_end, html_file ->
|
||||||
|
html_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_file}"));
|
||||||
|
|
||||||
|
assert name == "test_single_end.fastq"
|
||||||
|
assert is_single_end == true
|
||||||
|
assert html_file.getName() == "test_single_end.fastq_fastqc.html"
|
||||||
|
assert html_hash == "ff04679b50beabdbd9e93db646f5667d"
|
||||||
|
}
|
||||||
|
FASTQC.out.zip.map { name, is_single_end, zip_file ->
|
||||||
|
// NOTE: output zip files do not have a consistent hash
|
||||||
|
assert name == "test_single_end.fastq"
|
||||||
|
assert is_single_end == true
|
||||||
|
assert zip_file.getName() == "test_single_end.fastq_fastqc.zip"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,6 +74,32 @@ workflow test_paired_end {
|
||||||
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
|
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
|
||||||
.map {f -> [f[0], false, f[1]]}
|
.map {f -> [f[0], false, f[1]]}
|
||||||
FASTQC(input_files)
|
FASTQC(input_files)
|
||||||
|
|
||||||
|
// test that the output looks as expected
|
||||||
|
FASTQC.out.html.map { name, is_single_end, html_files ->
|
||||||
|
html_r1 = html_files[0]
|
||||||
|
html_r2 = html_files[1]
|
||||||
|
|
||||||
|
html_r1_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r1}"));
|
||||||
|
html_r2_hash = getFileChecksum(MessageDigest.getInstance("MD5"), new File("${html_r2}"));
|
||||||
|
|
||||||
|
assert name == "test_R"
|
||||||
|
assert is_single_end == false
|
||||||
|
assert html_r1.getName() == "test_R_1_fastqc.html"
|
||||||
|
assert html_r2.getName() == "test_R_2_fastqc.html"
|
||||||
|
assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
|
||||||
|
assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
|
||||||
|
}
|
||||||
|
FASTQC.out.zip.map { name, is_single_end, zip_files ->
|
||||||
|
zip_r1 = zip_files[0]
|
||||||
|
zip_r2 = zip_files[1]
|
||||||
|
// NOTE: output zip files do not have a consistent hash
|
||||||
|
|
||||||
|
assert name == "test_R"
|
||||||
|
assert is_single_end == false
|
||||||
|
assert zip_r1.getName() == "test_R_1_fastqc.zip"
|
||||||
|
assert zip_r2.getName() == "test_R_2_fastqc.zip"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow {
|
workflow {
|
||||||
|
|
Loading…
Add table
Reference in a new issue