mirror of
https://github.com/MillironX/nf-core_modules.git
synced 2024-11-13 05:13:09 +00:00
Merge pull request #40 from grst/update-fastqc
Update fastqc to adhere to new module guidelines
This commit is contained in:
commit
6028bb080b
12 changed files with 113 additions and 69 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,6 +1,6 @@
|
|||
.nextflow*
|
||||
work/
|
||||
results/
|
||||
./data
|
||||
test_output/
|
||||
.DS_Store
|
||||
*.code-workspace
|
||||
|
|
|
@ -6,4 +6,4 @@ channels:
|
|||
- bioconda
|
||||
- defaults
|
||||
dependencies:
|
||||
- fastqc=0.11.8
|
||||
- fastqc=0.11.9
|
||||
|
|
|
@ -1,37 +1,40 @@
|
|||
nextflow.preview.dsl = 2
|
||||
def MODULE = "fastqc"
|
||||
params.publish_dir = MODULE
|
||||
params.publish_results = "default"
|
||||
|
||||
process FASTQC {
|
||||
publishDir "${params.out_dir}/${params.publish_dir}",
|
||||
mode: params.publish_dir_mode,
|
||||
saveAs: { filename ->
|
||||
if (params.publish_results == "none") null
|
||||
else filename }
|
||||
|
||||
// tag "FastQC - $sample_id"
|
||||
container "docker.pkg.github.com/nf-core/$MODULE"
|
||||
|
||||
conda "${moduleDir}/environment.yml"
|
||||
|
||||
input:
|
||||
tuple val(name), path(reads)
|
||||
val (outputdir)
|
||||
// fastqc_args are best passed into the workflow in the following manner:
|
||||
// --fastqc_args="--nogroup -a custom_adapter_file.txt"
|
||||
val (fastqc_args)
|
||||
val (verbose)
|
||||
tuple val(name), val(single_end), path(reads)
|
||||
|
||||
output:
|
||||
tuple val(name), path ("*fastqc*"), emit: all
|
||||
path "*.zip", emit: report // e.g. for MultiQC later
|
||||
|
||||
// container 'quay.io/biocontainers/fastqc:0.11.8--2'
|
||||
|
||||
publishDir "$outputdir",
|
||||
mode: "copy", overwrite: true
|
||||
tuple val(name), val(single_end), path("*.html"), emit: html
|
||||
tuple val(name), val(single_end), path("*.zip"), emit: zip
|
||||
path "*.version.txt", emit: version
|
||||
|
||||
script:
|
||||
|
||||
if (verbose){
|
||||
println ("[MODULE] FASTQC ARGS: " + fastqc_args)
|
||||
}
|
||||
|
||||
// Add soft-links to original FastQs for consistent naming in pipeline
|
||||
if (single_end) {
|
||||
"""
|
||||
module load fastqc
|
||||
fastqc $fastqc_args -q -t 2 $reads
|
||||
|
||||
fastqc --version &> fastqc.version.txt
|
||||
[ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
|
||||
fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
|
||||
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
|
||||
"""
|
||||
|
||||
} else {
|
||||
"""
|
||||
[ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
|
||||
[ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
|
||||
fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
|
||||
fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
|
||||
"""
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,33 +1,63 @@
|
|||
name: FastQC
|
||||
description: Run FastQC on sequenced reads
|
||||
keywords:
|
||||
- Quality Control
|
||||
- QC
|
||||
- Adapters
|
||||
- Quality Control
|
||||
- QC
|
||||
- Adapters
|
||||
tools:
|
||||
- fastqc:
|
||||
description: |
|
||||
FastQC gives general quality metrics about your reads.
|
||||
It provides information about the quality score distribution
|
||||
across your reads, the per base sequence content (%A/C/G/T).
|
||||
You get information about adapter contamination and other
|
||||
overrepresented sequences.
|
||||
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
|
||||
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
|
||||
- fastqc:
|
||||
description: |
|
||||
FastQC gives general quality metrics about your reads.
|
||||
It provides information about the quality score distribution
|
||||
across your reads, the per base sequence content (%A/C/G/T).
|
||||
You get information about adapter contamination and other
|
||||
overrepresented sequences.
|
||||
homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
|
||||
documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
|
||||
params:
|
||||
- fastqc_args:
|
||||
type: string
|
||||
description: Additional command line arguments passed to fastqc.
|
||||
- out_dir:
|
||||
type: string
|
||||
description: |
|
||||
The pipeline's output directory. By default, the module will
|
||||
output files into `$out_dir/MODULE_NAME`
|
||||
- publish_dir:
|
||||
type: string
|
||||
description: |
|
||||
Append to the path for the standard output directory provided by `$out_dir`.
|
||||
- publish_dir_mode:
|
||||
type: string
|
||||
description: |
|
||||
Provide a value for the Nextflow `publishDir` mode parameter
|
||||
(e.g. copy, link, ...)
|
||||
- publish_results:
|
||||
type: string
|
||||
description: |
|
||||
Whether or not to publish results into `publish_dir`. Set to `none` to not
|
||||
publish any files at all; to `default` to publish all relevant files.
|
||||
input:
|
||||
-
|
||||
- name:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- reads:
|
||||
type: file
|
||||
description: Input FastQ file, or pair of files
|
||||
- name:
|
||||
type: string
|
||||
description: Sample identifier
|
||||
- single_end:
|
||||
type: boolean
|
||||
description: |
|
||||
Boolean indicating whether the corresponding sample is single-end (true)
|
||||
or paired-end (false).
|
||||
- reads:
|
||||
type: file
|
||||
description: |
|
||||
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
|
||||
respectively.
|
||||
output:
|
||||
-
|
||||
- report:
|
||||
type: file
|
||||
description: FastQC report
|
||||
pattern: "*_fastqc.{zip,html}"
|
||||
- report:
|
||||
type: file
|
||||
description: FastQC report
|
||||
pattern: "*_fastqc.{zip,html}"
|
||||
authors:
|
||||
- "@ewels"
|
||||
- "@FelixKrueger"
|
||||
- "@grst"
|
||||
- "@drpatelh"
|
||||
- "@ewels"
|
||||
- "@FelixKrueger"
|
||||
|
|
1
software/fastqc/test/data/test_R1.fastq.gz
Symbolic link
1
software/fastqc/test/data/test_R1.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R1.fastq.gz
|
1
software/fastqc/test/data/test_R2.fastq.gz
Symbolic link
1
software/fastqc/test/data/test_R2.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_R2.fastq.gz
|
1
software/fastqc/test/data/test_single_end.fastq.gz
Symbolic link
1
software/fastqc/test/data/test_single_end.fastq.gz
Symbolic link
|
@ -0,0 +1 @@
|
|||
../../../../tests/data/fastq/rna/test_single_end.fastq.gz
|
|
@ -1,21 +1,31 @@
|
|||
#!/usr/bin/env nextflow
|
||||
nextflow.preview.dsl = 2
|
||||
|
||||
params.outdir = "." // gets set in nextflow.config file (as './results/fastqc')
|
||||
params.out_dir = "test_output"
|
||||
params.fastqc_args = ''
|
||||
params.verbose = false
|
||||
params.publish_dir_mode = "copy"
|
||||
|
||||
// TODO: check the output files in some way
|
||||
// include '../../../tests/functions/check_process_outputs.nf'
|
||||
include '../main.nf'
|
||||
include { FASTQC } from '../main.nf'
|
||||
|
||||
// Define input channels
|
||||
ch_read_files = Channel
|
||||
.fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
|
||||
// .view() // to check whether the input channel works
|
||||
|
||||
// Run the workflow
|
||||
workflow {
|
||||
FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose)
|
||||
// .check_output()
|
||||
/**
|
||||
* Test if FASTQC runs with single-end data
|
||||
*/
|
||||
workflow test_single_end {
|
||||
input_files = Channel.fromPath("data/test_single_end.fastq.gz")
|
||||
.map {f -> [f.baseName, true, f]}
|
||||
FASTQC(input_files)
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if FASTQC runs with paired end data
|
||||
*/
|
||||
workflow test_paired_end {
|
||||
input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
|
||||
.map {f -> [f[0], false, f[1]]}
|
||||
FASTQC(input_files)
|
||||
}
|
||||
|
||||
workflow {
|
||||
test_single_end()
|
||||
test_paired_end()
|
||||
}
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
// docker.enabled = true
|
||||
params.outdir = './results/fastqc'
|
BIN
tests/data/fastq/rna/test_R1.fastq.gz
Normal file
BIN
tests/data/fastq/rna/test_R1.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/test_R2.fastq.gz
Normal file
BIN
tests/data/fastq/rna/test_R2.fastq.gz
Normal file
Binary file not shown.
BIN
tests/data/fastq/rna/test_single_end.fastq.gz
Normal file
BIN
tests/data/fastq/rna/test_single_end.fastq.gz
Normal file
Binary file not shown.
Loading…
Reference in a new issue