Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
2024-12-22 11:08:17 +00:00 · 2020-07-15 12:59:28 +01:00 · 2020-07-15 12:59:28 +01:00 · 6028bb080b
commit 6028bb080b
parent 741b0786f9 a2bcb5c36a
12 changed files with 113 additions and 69 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,6 @@
 .nextflow*
 work/
 results/
-./data
+test_output/
 .DS_Store
 *.code-workspace
--- a/software/fastqc/environment.yml
+++ b/software/fastqc/environment.yml
@ -6,4 +6,4 @@ channels:
  - bioconda
  - defaults
 dependencies:
-  - fastqc=0.11.8
+  - fastqc=0.11.9
--- a/software/fastqc/main.nf
+++ b/software/fastqc/main.nf
@ -1,37 +1,40 @@
-nextflow.preview.dsl = 2
+def MODULE = "fastqc"
+params.publish_dir = MODULE
+params.publish_results = "default"

 process FASTQC {
+    publishDir "${params.out_dir}/${params.publish_dir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename ->
+                    if (params.publish_results == "none") null
+                    else filename }

-    // tag "FastQC - $sample_id"
+    container "docker.pkg.github.com/nf-core/$MODULE"
+
+    conda "${moduleDir}/environment.yml"

    input:
-        tuple val(name), path(reads)
-        val (outputdir)
-        // fastqc_args are best passed into the workflow in the following manner:
-        // --fastqc_args="--nogroup -a custom_adapter_file.txt"
-        val (fastqc_args)
-        val (verbose)
+    tuple val(name), val(single_end), path(reads)

    output:
-        tuple val(name), path ("*fastqc*"), emit: all
-        path "*.zip",                       emit: report // e.g. for MultiQC later
-
-    // container 'quay.io/biocontainers/fastqc:0.11.8--2'
-
-    publishDir "$outputdir",
-        mode: "copy", overwrite: true
+    tuple val(name), val(single_end), path("*.html"), emit: html
+    tuple val(name), val(single_end), path("*.zip"), emit: zip
+    path "*.version.txt", emit: version

    script:
-
-        if (verbose){
-            println ("[MODULE] FASTQC ARGS: " + fastqc_args)
-        }
-
+    // Add soft-links to original FastQs for consistent naming in pipeline
+    if (single_end) {
        """
-        module load fastqc
-        fastqc $fastqc_args -q -t 2 $reads
-
-        fastqc --version &> fastqc.version.txt
+        [ ! -f  ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
+        fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
+        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
        """
-
+    } else {
+        """
+        [ ! -f  ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
+        [ ! -f  ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
+        fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
+        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
+        """
+    }
 }
--- a/software/fastqc/meta.yml
+++ b/software/fastqc/meta.yml
@ -1,33 +1,63 @@
 name: FastQC
 description: Run FastQC on sequenced reads
 keywords:
-    - Quality Control
-    - QC
-    - Adapters
+  - Quality Control
+  - QC
+  - Adapters
 tools:
-    - fastqc:
-        description: |
-            FastQC gives general quality metrics about your reads.
-            It provides information about the quality score distribution
-            across your reads, the per base sequence content (%A/C/G/T).
-            You get information about adapter contamination and other
-            overrepresented sequences.
-        homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-        documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+  - fastqc:
+      description: |
+        FastQC gives general quality metrics about your reads.
+        It provides information about the quality score distribution
+        across your reads, the per base sequence content (%A/C/G/T).
+        You get information about adapter contamination and other
+        overrepresented sequences.
+      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+params:
+  - fastqc_args:
+      type: string
+      description: Additional command line arguments passed to fastqc.
+  - out_dir:
+      type: string
+      description: |
+        The pipeline's output directory. By default, the module will
+        output files into `$out_dir/MODULE_NAME`
+  - publish_dir:
+      type: string
+      description: |
+        Append to the path for the standard output directory provided by `$out_dir`.
+  - publish_dir_mode:
+      type: string
+      description: |
+        Provide a value for the Nextflow `publishDir` mode parameter
+        (e.g. copy, link, ...)
+  - publish_results:
+      type: string
+      description: |
+        Whether or not to publish results into `publish_dir`. Set to `none` to not
+        publish any files at all; to `default` to publish all relevant files.
 input:
-    -
-        - name:
-            type: string
-            description: Sample identifier
-        - reads:
-            type: file
-            description: Input FastQ file, or pair of files
+  - name:
+      type: string
+      description: Sample identifier
+  - single_end:
+      type: boolean
+      description: |
+        Boolean indicating whether the corresponding sample is single-end (true)
+        or paired-end (false).
+  - reads:
+      type: file
+      description: |
+        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+        respectively.
 output:
-    -
-        - report:
-            type: file
-            description: FastQC report
-            pattern: "*_fastqc.{zip,html}"
+  - report:
+      type: file
+      description: FastQC report
+      pattern: "*_fastqc.{zip,html}"
 authors:
-    - "@ewels"
-    - "@FelixKrueger"
+  - "@grst"
+  - "@drpatelh"
+  - "@ewels"
+  - "@FelixKrueger"
--- a/software/fastqc/test/data/test_R1.fastq.gz
+++ b/software/fastqc/test/data/test_R1.fastq.gz
@ -0,0 +1 @@
+../../../../tests/data/fastq/rna/test_R1.fastq.gz
--- a/software/fastqc/test/data/test_R2.fastq.gz
+++ b/software/fastqc/test/data/test_R2.fastq.gz
@ -0,0 +1 @@
+../../../../tests/data/fastq/rna/test_R2.fastq.gz
--- a/software/fastqc/test/data/test_single_end.fastq.gz
+++ b/software/fastqc/test/data/test_single_end.fastq.gz
@ -0,0 +1 @@
+../../../../tests/data/fastq/rna/test_single_end.fastq.gz
--- a/software/fastqc/test/main.nf
+++ b/software/fastqc/test/main.nf
@ -1,21 +1,31 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl = 2

-params.outdir = "."             // gets set in nextflow.config file (as './results/fastqc')
+params.out_dir = "test_output"
 params.fastqc_args = ''
-params.verbose = false
+params.publish_dir_mode = "copy"

-// TODO: check the output files in some way
-// include '../../../tests/functions/check_process_outputs.nf'
-include '../main.nf'
+include { FASTQC } from '../main.nf'

-// Define input channels
-ch_read_files = Channel 
-    .fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
-    // .view()  // to check whether the input channel works
-
-// Run the workflow
-workflow {
-    FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose)
-    // .check_output()
+/**
+ * Test if FASTQC runs with single-end data
+ */
+workflow test_single_end {
+    input_files = Channel.fromPath("data/test_single_end.fastq.gz")
+                    .map {f -> [f.baseName, true, f]}
+    FASTQC(input_files)
+}
+
+/**
+ * Test if FASTQC runs with paired end data
+ */
+workflow test_paired_end {
+    input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
+                    .map {f -> [f[0], false, f[1]]}
+    FASTQC(input_files)
+}
+
+workflow {
+    test_single_end()
+    test_paired_end()
 }
--- a/software/fastqc/test/nextflow.config
+++ b/software/fastqc/test/nextflow.config
@ -1,2 +0,0 @@
-// docker.enabled = true
-params.outdir = './results/fastqc'
--- a/tests/data/fastq/rna/test_R1.fastq.gz
+++ b/tests/data/fastq/rna/test_R1.fastq.gz
--- a/tests/data/fastq/rna/test_R2.fastq.gz
+++ b/tests/data/fastq/rna/test_R2.fastq.gz
--- a/tests/data/fastq/rna/test_single_end.fastq.gz
+++ b/tests/data/fastq/rna/test_single_end.fastq.gz
				`@ -0,0 +1 @@`
				`../../../../tests/data/fastq/rna/test_R1.fastq.gz`
				`@ -0,0 +1 @@`
				`../../../../tests/data/fastq/rna/test_single_end.fastq.gz`