Merge pull request #58 from drpatelh/master

Get tests working for FastQC
This commit is contained in:
Harshil Patel 2020-08-07 10:11:53 +01:00 committed by GitHub
commit 5af5fc395e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 697 additions and 837 deletions

View file

@ -1,4 +1,4 @@
name: FastQC
name: fastqc
on:
push:
paths:
@ -12,7 +12,7 @@ on:
- tests
jobs:
run_ci_test:
ci_test:
runs-on: ubuntu-latest
env:
NXF_ANSI_LOG: false
@ -22,9 +22,9 @@ jobs:
- name: Install Nextflow
run: |
export NXF_VER="20.06.0-edge"
export NXF_VER="20.07.1"
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
# Test the module
- run: nextflow run ./software/fastqc/test/
- run: nextflow run ./software/fastqc/test/ -profile docker

View file

@ -200,12 +200,14 @@ We also use a standardised parameter called `params.publish_dir_mode` that can b
- Test files MUST be kept as tiny as possible.
- It is RECOMMENDED to re-use generic files from [`tests/data/`](tests/data/) by symlinking them into the [`test/`](software/fastqc/test) directory of the module.
- If the appropriate test data doesn't exist for your module then it MUST be added to [`tests/data`](tests/data/).
- Every module MUST be tested by adding a test workflow with a toy dataset in the [`test/`](software/fastqc/test) directory of the module.
- Generic files from [`tests/data/`](tests/data/) SHOULD be reused by symlinking them into the [`test/input/`](software/fastqc/test/input/) directory of the module.
- Any outputs produced by the test workflow SHOULD be placed in a folder called [`test/output/`](software/fastqc/test/output/) so that they can be used for unit testing.
- If the appropriate test data doesn't exist for your module then it MUST be added to [`tests/data/`](tests/data/).
### Documentation
- A module MUST be documented in the [`meta.yml`](software/fastqc/meta.yml) file. It MUST document `params`, `input` and `output`. `input` and `output` MUST be a nested list.

View file

@ -1,4 +1,4 @@
name: FastQC
name: fastqc
description: Run FastQC on sequenced reads
keywords:
- Quality Control
@ -15,37 +15,27 @@ tools:
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
params:
- fastqc_args:
type: string
description: Additional command line arguments passed to fastqc.
- out_dir:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$out_dir/MODULE_NAME`
- publish_dir:
type: string
description: |
Append to the path for the standard output directory provided by `$out_dir`.
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Provide a value for the Nextflow `publishDir` mode parameter
(e.g. copy, link, ...)
- publish_results:
type: string
description: |
Whether or not to publish results into `publish_dir`. Set to `none` to not
publish any files at all; to `default` to publish all relevant files.
input:
- name:
type: string
description: Sample identifier
- single_end:
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- conda:
type: boolean
description: |
Boolean indicating whether the corresponding sample is single-end (true)
or paired-end (false).
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- reads:
type: file
description: |
@ -57,7 +47,7 @@ output:
description: FastQC report
pattern: "*_fastqc.{zip,html}"
authors:
- "@grst"
- "@drpatelh"
- "@grst"
- "@ewels"
- "@FelixKrueger"

View file

@ -0,0 +1 @@
../../../lib/

View file

@ -0,0 +1,76 @@
#!/usr/bin/env nextflow
import checksum
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
include { FASTQC } from '../main.nf'
/**
* Test if FASTQC runs with single-end data
*/
workflow test_single_end {
input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
.map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
FASTQC(input_files)
// test that the output looks as expected
FASTQC.out.html.map { name, is_single_end, html_file ->
html_hash = checksum.getMD5(new File("${html_file}"));
assert name == "test_single_end"
assert is_single_end == true
assert html_file.getName() == "test_single_end_fastqc.html"
// Hash seems to vary between local runs and GitHub Actions
// TODO: Might be solved when using Docker for tests?
// assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
}
FASTQC.out.zip.map { name, is_single_end, zip_file ->
// NOTE: output zip files do not have a consistent hash
assert name == "test_single_end"
assert is_single_end == true
assert zip_file.getName() == "test_single_end_fastqc.zip"
}
}
/**
* Test if FASTQC runs with paired end data
*/
workflow test_paired_end {
input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
.map {f -> [f[0], false, f[1]]}
FASTQC(input_files)
// test that the output looks as expected
FASTQC.out.html.map { name, is_single_end, html_files ->
html_r1 = html_files[0]
html_r2 = html_files[1]
html_r1_hash = checksum.getMD5(new File("${html_r1}"));
html_r2_hash = checksum.getMD5(new File("${html_r2}"));
assert name == "test_R"
assert is_single_end == false
assert html_r1.getName() == "test_R_1_fastqc.html"
assert html_r2.getName() == "test_R_2_fastqc.html"
assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
}
FASTQC.out.zip.map { name, is_single_end, zip_files ->
zip_r1 = zip_files[0]
zip_r2 = zip_files[1]
// NOTE: output zip files do not have a consistent hash
assert name == "test_R"
assert is_single_end == false
assert zip_r1.getName() == "test_R_1_fastqc.zip"
assert zip_r2.getName() == "test_R_2_fastqc.zip"
}
}
workflow {
test_single_end()
test_paired_end()
}

View file

@ -1 +0,0 @@
../../../lib

View file

@ -1,73 +1,32 @@
#!/usr/bin/env nextflow
import checksum
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
nextflow.enable.dsl = 2
include { FASTQC } from '../main.nf'
/**
* Test if FASTQC runs with single-end data
/*
* Test with single-end data
*/
workflow test_single_end {
input_files = Channel.fromPath("${baseDir}/input/test_single_end.fastq.gz")
.map {f -> [f.name.replace(".fastq.gz", ""), true, f]}
FASTQC(input_files)
// test that the output looks as expected
FASTQC.out.html.map { name, is_single_end, html_file ->
html_hash = checksum.getMD5(new File("${html_file}"));
def input = []
input = [ [ id:'test', single_end:true ], // meta map
[ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ]
assert name == "test_single_end"
assert is_single_end == true
assert html_file.getName() == "test_single_end_fastqc.html"
// Hash seems to vary between local runs and GitHub Actions
// TODO: Might be solved when using Docker for tests?
// assert html_hash == "8ed68442ebb5b9706bf79b4f66701e15"
}
FASTQC.out.zip.map { name, is_single_end, zip_file ->
// NOTE: output zip files do not have a consistent hash
assert name == "test_single_end"
assert is_single_end == true
assert zip_file.getName() == "test_single_end_fastqc.zip"
}
FASTQC ( input, [ publish_dir:'test_single_end' ] )
}
/**
* Test if FASTQC runs with paired end data
/*
* Test with paired-end data
*/
workflow test_paired_end {
input_files = Channel.fromFilePairs("input/test_R{1,2}.fastq.gz")
.map {f -> [f[0], false, f[1]]}
FASTQC(input_files)
// test that the output looks as expected
FASTQC.out.html.map { name, is_single_end, html_files ->
html_r1 = html_files[0]
html_r2 = html_files[1]
def input = []
input = [ [ id:'test', single_end:false ], // meta map
[ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true),
file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ]
html_r1_hash = checksum.getMD5(new File("${html_r1}"));
html_r2_hash = checksum.getMD5(new File("${html_r2}"));
assert name == "test_R"
assert is_single_end == false
assert html_r1.getName() == "test_R_1_fastqc.html"
assert html_r2.getName() == "test_R_2_fastqc.html"
assert html_r1_hash == "082c13ce7163ea0f52a66b83cb57b0f0"
assert html_r2_hash == "4ff04ec8da77e3af512f03b8c09a9e04"
}
FASTQC.out.zip.map { name, is_single_end, zip_files ->
zip_r1 = zip_files[0]
zip_r2 = zip_files[1]
// NOTE: output zip files do not have a consistent hash
assert name == "test_R"
assert is_single_end == false
assert zip_r1.getName() == "test_R_1_fastqc.zip"
assert zip_r2.getName() == "test_R_2_fastqc.zip"
}
FASTQC ( input, [ publish_dir:'test_paired_end' ] )
}
workflow {

View file

@ -0,0 +1,20 @@
params {
outdir = "output/"
publish_dir_mode = "copy"
conda = false
}
profiles {
conda {
params.conda = true
}
docker {
docker.enabled = true
docker.runOptions = '-u \$(id -u):\$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long