Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
This commit is contained in:
Harshil Patel 2020-07-15 12:59:28 +01:00 committed by GitHub
commit 6028bb080b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 113 additions and 69 deletions

2
.gitignore vendored
View file

@ -1,6 +1,6 @@
.nextflow*
work/
results/
./data
test_output/
.DS_Store
*.code-workspace

View file

@ -6,4 +6,4 @@ channels:
- bioconda
- defaults
dependencies:
- fastqc=0.11.8
- fastqc=0.11.9

View file

@ -1,37 +1,40 @@
nextflow.preview.dsl = 2
def MODULE = "fastqc"
params.publish_dir = MODULE
params.publish_results = "default"
process FASTQC {
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
// tag "FastQC - $sample_id"
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
tuple val(name), path(reads)
val (outputdir)
// fastqc_args are best passed into the workflow in the following manner:
// --fastqc_args="--nogroup -a custom_adapter_file.txt"
val (fastqc_args)
val (verbose)
tuple val(name), val(single_end), path(reads)
output:
tuple val(name), path ("*fastqc*"), emit: all
path "*.zip", emit: report // e.g. for MultiQC later
// container 'quay.io/biocontainers/fastqc:0.11.8--2'
publishDir "$outputdir",
mode: "copy", overwrite: true
tuple val(name), val(single_end), path("*.html"), emit: html
tuple val(name), val(single_end), path("*.zip"), emit: zip
path "*.version.txt", emit: version
script:
if (verbose){
println ("[MODULE] FASTQC ARGS: " + fastqc_args)
// Add soft-links to original FastQs for consistent naming in pipeline
if (single_end) {
"""
[ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
"""
} else {
"""
[ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
[ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
"""
}
"""
module load fastqc
fastqc $fastqc_args -q -t 2 $reads
fastqc --version &> fastqc.version.txt
"""
}

View file

@ -14,20 +14,50 @@ tools:
overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
params:
- fastqc_args:
type: string
description: Additional command line arguments passed to fastqc.
- out_dir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$out_dir/MODULE_NAME`
- publish_dir:
type: string
description: |
Append to the path for the standard output directory provided by `$out_dir`.
- publish_dir_mode:
type: string
description: |
Provide a value for the Nextflow `publishDir` mode parameter
(e.g. copy, link, ...)
- publish_results:
type: string
description: |
Whether or not to publish results into `publish_dir`. Set to `none` to not
publish any files at all; to `default` to publish all relevant files.
input:
-
- name:
type: string
description: Sample identifier
- single_end:
type: boolean
description: |
Boolean indicating whether the corresponding sample is single-end (true)
or paired-end (false).
- reads:
type: file
description: Input FastQ file, or pair of files
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output:
-
- report:
type: file
description: FastQC report
pattern: "*_fastqc.{zip,html}"
authors:
- "@grst"
- "@drpatelh"
- "@ewels"
- "@FelixKrueger"

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R1.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R2.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_single_end.fastq.gz

View file

@ -1,21 +1,31 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.outdir = "." // gets set in nextflow.config file (as './results/fastqc')
params.out_dir = "test_output"
params.fastqc_args = ''
params.verbose = false
params.publish_dir_mode = "copy"
// TODO: check the output files in some way
// include '../../../tests/functions/check_process_outputs.nf'
include '../main.nf'
include { FASTQC } from '../main.nf'
// Define input channels
ch_read_files = Channel
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
// .view() // to check whether the input channel works
// Run the workflow
workflow {
FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose)
// .check_output()
/**
* Test if FASTQC runs with single-end data
*/
workflow test_single_end {
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
.map {f -> [f.baseName, true, f]}
FASTQC(input_files)
}
/**
* Test if FASTQC runs with paired end data
*/
workflow test_paired_end {
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
.map {f -> [f[0], false, f[1]]}
FASTQC(input_files)
}
workflow {
test_single_end()
test_paired_end()
}

View file

@ -1,2 +0,0 @@
// docker.enabled = true
params.outdir = './results/fastqc'

Binary file not shown.

Binary file not shown.

Binary file not shown.