Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
This commit is contained in:
Harshil Patel 2020-07-15 12:59:28 +01:00 committed by GitHub
commit 6028bb080b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 113 additions and 69 deletions

2
.gitignore vendored
View file

@ -1,6 +1,6 @@
.nextflow* .nextflow*
work/ work/
results/ results/
./data test_output/
.DS_Store .DS_Store
*.code-workspace *.code-workspace

View file

@ -6,4 +6,4 @@ channels:
- bioconda - bioconda
- defaults - defaults
dependencies: dependencies:
- fastqc=0.11.8 - fastqc=0.11.9

View file

@ -1,37 +1,40 @@
nextflow.preview.dsl = 2 def MODULE = "fastqc"
params.publish_dir = MODULE
params.publish_results = "default"
process FASTQC { process FASTQC {
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
// tag "FastQC - $sample_id" container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input: input:
tuple val(name), path(reads) tuple val(name), val(single_end), path(reads)
val (outputdir)
// fastqc_args are best passed into the workflow in the following manner:
// --fastqc_args="--nogroup -a custom_adapter_file.txt"
val (fastqc_args)
val (verbose)
output: output:
tuple val(name), path ("*fastqc*"), emit: all tuple val(name), val(single_end), path("*.html"), emit: html
path "*.zip", emit: report // e.g. for MultiQC later tuple val(name), val(single_end), path("*.zip"), emit: zip
path "*.version.txt", emit: version
// container 'quay.io/biocontainers/fastqc:0.11.8--2'
publishDir "$outputdir",
mode: "copy", overwrite: true
script: script:
// Add soft-links to original FastQs for consistent naming in pipeline
if (verbose){ if (single_end) {
println ("[MODULE] FASTQC ARGS: " + fastqc_args) """
[ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
"""
} else {
"""
[ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
[ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
"""
} }
"""
module load fastqc
fastqc $fastqc_args -q -t 2 $reads
fastqc --version &> fastqc.version.txt
"""
} }

View file

@ -14,20 +14,50 @@ tools:
overrepresented sequences. overrepresented sequences.
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
params:
- fastqc_args:
type: string
description: Additional command line arguments passed to fastqc.
- out_dir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$out_dir/MODULE_NAME`
- publish_dir:
type: string
description: |
Append to the path for the standard output directory provided by `$out_dir`.
- publish_dir_mode:
type: string
description: |
Provide a value for the Nextflow `publishDir` mode parameter
(e.g. copy, link, ...)
- publish_results:
type: string
description: |
Whether or not to publish results into `publish_dir`. Set to `none` to not
publish any files at all; to `default` to publish all relevant files.
input: input:
-
- name: - name:
type: string type: string
description: Sample identifier description: Sample identifier
- single_end:
type: boolean
description: |
Boolean indicating whether the corresponding sample is single-end (true)
or paired-end (false).
- reads: - reads:
type: file type: file
description: Input FastQ file, or pair of files description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
output: output:
-
- report: - report:
type: file type: file
description: FastQC report description: FastQC report
pattern: "*_fastqc.{zip,html}" pattern: "*_fastqc.{zip,html}"
authors: authors:
- "@grst"
- "@drpatelh"
- "@ewels" - "@ewels"
- "@FelixKrueger" - "@FelixKrueger"

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R1.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_R2.fastq.gz

View file

@ -0,0 +1 @@
../../../../tests/data/fastq/rna/test_single_end.fastq.gz

View file

@ -1,21 +1,31 @@
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
nextflow.preview.dsl = 2 nextflow.preview.dsl = 2
params.outdir = "." // gets set in nextflow.config file (as './results/fastqc') params.out_dir = "test_output"
params.fastqc_args = '' params.fastqc_args = ''
params.verbose = false params.publish_dir_mode = "copy"
// TODO: check the output files in some way include { FASTQC } from '../main.nf'
// include '../../../tests/functions/check_process_outputs.nf'
include '../main.nf'
// Define input channels /**
ch_read_files = Channel * Test if FASTQC runs with single-end data
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1) */
// .view() // to check whether the input channel works workflow test_single_end {
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
// Run the workflow .map {f -> [f.baseName, true, f]}
workflow { FASTQC(input_files)
FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose) }
// .check_output()
/**
* Test if FASTQC runs with paired end data
*/
workflow test_paired_end {
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
.map {f -> [f[0], false, f[1]]}
FASTQC(input_files)
}
workflow {
test_single_end()
test_paired_end()
} }

View file

@ -1,2 +0,0 @@
// docker.enabled = true
params.outdir = './results/fastqc'

Binary file not shown.

Binary file not shown.

Binary file not shown.