Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
This commit is contained in:
Harshil Patel 2020-07-15 12:59:28 +01:00 committed by GitHub
commit 6028bb080b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 113 additions and 69 deletions

2
.gitignore vendored
View file

@ -1,6 +1,6 @@
.nextflow* .nextflow*
work/ work/
results/ results/
./data test_output/
.DS_Store .DS_Store
*.code-workspace *.code-workspace

View file

@ -6,4 +6,4 @@ channels:
- bioconda - bioconda
- defaults - defaults
dependencies: dependencies:
- fastqc=0.11.8 - fastqc=0.11.9

View file

@ -1,37 +1,40 @@
nextflow.preview.dsl = 2 def MODULE = "fastqc"
params.publish_dir = MODULE
params.publish_results = "default"
process FASTQC { process FASTQC {
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
// tag "FastQC - $sample_id" container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input: input:
tuple val(name), path(reads) tuple val(name), val(single_end), path(reads)
val (outputdir)
// fastqc_args are best passed into the workflow in the following manner:
// --fastqc_args="--nogroup -a custom_adapter_file.txt"
val (fastqc_args)
val (verbose)
output: output:
tuple val(name), path ("*fastqc*"), emit: all tuple val(name), val(single_end), path("*.html"), emit: html
path "*.zip", emit: report // e.g. for MultiQC later tuple val(name), val(single_end), path("*.zip"), emit: zip
path "*.version.txt", emit: version
// container 'quay.io/biocontainers/fastqc:0.11.8--2'
publishDir "$outputdir",
mode: "copy", overwrite: true
script: script:
// Add soft-links to original FastQs for consistent naming in pipeline
if (verbose){ if (single_end) {
println ("[MODULE] FASTQC ARGS: " + fastqc_args)
}
""" """
module load fastqc [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
fastqc $fastqc_args -q -t 2 $reads fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
fastqc --version &> fastqc.version.txt
""" """
} else {
"""
[ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
[ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
"""
}
} }

View file

@ -1,33 +1,63 @@
name: FastQC name: FastQC
description: Run FastQC on sequenced reads description: Run FastQC on sequenced reads
keywords: keywords:
- Quality Control - Quality Control
- QC - QC
- Adapters - Adapters
tools: tools:
- fastqc: - fastqc:
description: | description: |
FastQC gives general quality metrics about your reads. FastQC gives general quality metrics about your reads.
It provides information about the quality score distribution It provides information about the quality score distribution
across your reads, the per base sequence content (%A/C/G/T). across your reads, the per base sequence content (%A/C/G/T).
You get information about adapter contamination and other You get information about adapter contamination and other
overrepresented sequences. overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
params:
- fastqc_args:
type: string
description: Additional command line arguments passed to fastqc.
- out_dir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$out_dir/MODULE_NAME`
- publish_dir:
type: string
description: |
Append to the path for the standard output directory provided by `$out_dir`.
- publish_dir_mode:
type: string
description: |
Provide a value for the Nextflow `publishDir` mode parameter
(e.g. copy, link, ...)
- publish_results:
type: string
description: |
Whether or not to publish results into `publish_dir`. Set to `none` to not
publish any files at all; to `default` to publish all relevant files.
input: input:
- - name:
- name: type: string
type: string description: Sample identifier
description: Sample identifier - single_end:
- reads: type: boolean
type: file description: |
description: Input FastQ file, or pair of files Boolean indicating whether the corresponding sample is single-end (true)
or paired-end (false).
- reads:
type: file
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output: output:
- - report:
- report: type: file
type: file description: FastQC report
description: FastQC report pattern: "*_fastqc.{zip,html}"
pattern: "*_fastqc.{zip,html}"
authors: authors:
- "@ewels" - "@grst"
- "@FelixKrueger" - "@drpatelh"
- "@ewels"
- "@FelixKrueger"

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R1.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R2.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_single_end.fastq.gz

View file

@ -1,21 +1,31 @@
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
nextflow.preview.dsl = 2 nextflow.preview.dsl = 2
params.outdir = "." // gets set in nextflow.config file (as './results/fastqc') params.out_dir = "test_output"
params.fastqc_args = '' params.fastqc_args = ''
params.verbose = false params.publish_dir_mode = "copy"
// TODO: check the output files in some way include { FASTQC } from '../main.nf'
// include '../../../tests/functions/check_process_outputs.nf'
include '../main.nf'
// Define input channels /**
ch_read_files = Channel * Test if FASTQC runs with single-end data
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1) */
// .view() // to check whether the input channel works workflow test_single_end {
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
// Run the workflow .map {f -> [f.baseName, true, f]}
workflow { FASTQC(input_files)
FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose) }
// .check_output()
/**
* Test if FASTQC runs with paired end data
*/
workflow test_paired_end {
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
.map {f -> [f[0], false, f[1]]}
FASTQC(input_files)
}
workflow {
test_single_end()
test_paired_end()
} }

View file

@ -1,2 +0,0 @@
// docker.enabled = true
params.outdir = './results/fastqc'

Binary file not shown.

Binary file not shown.

Binary file not shown.