Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
2025-01-03 04:52:09 -05:00 · 2020-07-15 12:59:28 +01:00 · 2020-07-15 12:59:28 +01:00 · 6028bb080b
commit 6028bb080b
parent 741b0786f9 a2bcb5c36a
12 changed files with 113 additions and 69 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,6 @@
 .nextflow*
 work/
 results/
-./data
+test_output/
 .DS_Store
 *.code-workspace
--- a/software/fastqc/environment.yml
+++ b/software/fastqc/environment.yml
@ -6,4 +6,4 @@ channels:
  - bioconda
  - defaults
 dependencies:
-  - fastqc=0.11.8
+  - fastqc=0.11.9
--- a/software/fastqc/main.nf
+++ b/software/fastqc/main.nf
@ -1,37 +1,40 @@
-nextflow.preview.dsl = 2
+def MODULE = "fastqc"
 params.publish_dir = MODULE
 params.publish_results = "default"
 process FASTQC {
    publishDir "${params.out_dir}/${params.publish_dir}",
        mode: params.publish_dir_mode,
        saveAs: { filename ->
                    if (params.publish_results == "none") null
                    else filename }
-    // tag "FastQC - $sample_id"
+    container "docker.pkg.github.com/nf-core/$MODULE"
    conda "${moduleDir}/environment.yml"
    input:
-        tuple val(name), path(reads)
+    tuple val(name), val(single_end), path(reads)
        val (outputdir)
        // fastqc_args are best passed into the workflow in the following manner:
        // --fastqc_args="--nogroup -a custom_adapter_file.txt"
        val (fastqc_args)
        val (verbose)
    output:
-        tuple val(name), path ("*fastqc*"), emit: all
+    tuple val(name), val(single_end), path("*.html"), emit: html
-        path "*.zip",                       emit: report // e.g. for MultiQC later
+    tuple val(name), val(single_end), path("*.zip"), emit: zip
-
+    path "*.version.txt", emit: version
    // container 'quay.io/biocontainers/fastqc:0.11.8--2'
    publishDir "$outputdir",
        mode: "copy", overwrite: true
    script:
-
+    // Add soft-links to original FastQs for consistent naming in pipeline
-        if (verbose){
+    if (single_end) {
            println ("[MODULE] FASTQC ARGS: " + fastqc_args)
        }
        """
-        module load fastqc
+        [ ! -f  ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
-        fastqc $fastqc_args -q -t 2 $reads
+        fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
-
+        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
        fastqc --version &> fastqc.version.txt
        """
-
+    } else {
        """
        [ ! -f  ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
        [ ! -f  ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
        fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
        """
    }
 }
--- a/software/fastqc/meta.yml
+++ b/software/fastqc/meta.yml
@ -1,33 +1,63 @@
 name: FastQC
 description: Run FastQC on sequenced reads
 keywords:
-    - Quality Control
+  - Quality Control
-    - QC
+  - QC
-    - Adapters
+  - Adapters
 tools:
-    - fastqc:
+  - fastqc:
-        description: |
+      description: |
-            FastQC gives general quality metrics about your reads.
+        FastQC gives general quality metrics about your reads.
-            It provides information about the quality score distribution
+        It provides information about the quality score distribution
-            across your reads, the per base sequence content (%A/C/G/T).
+        across your reads, the per base sequence content (%A/C/G/T).
-            You get information about adapter contamination and other
+        You get information about adapter contamination and other
-            overrepresented sequences.
+        overrepresented sequences.
-        homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
-        documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
 params:
  - fastqc_args:
      type: string
      description: Additional command line arguments passed to fastqc.
  - out_dir:
      type: string
      description: |
        The pipeline's output directory. By default, the module will
        output files into `$out_dir/MODULE_NAME`
  - publish_dir:
      type: string
      description: |
        Append to the path for the standard output directory provided by `$out_dir`.
  - publish_dir_mode:
      type: string
      description: |
        Provide a value for the Nextflow `publishDir` mode parameter
        (e.g. copy, link, ...)
  - publish_results:
      type: string
      description: |
        Whether or not to publish results into `publish_dir`. Set to `none` to not
        publish any files at all; to `default` to publish all relevant files.
 input:
-    -
+  - name:
-        - name:
+      type: string
-            type: string
+      description: Sample identifier
-            description: Sample identifier
+  - single_end:
-        - reads:
+      type: boolean
-            type: file
+      description: |
-            description: Input FastQ file, or pair of files
+        Boolean indicating whether the corresponding sample is single-end (true)
        or paired-end (false).
  - reads:
      type: file
      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
 output:
-    -
+  - report:
-        - report:
+      type: file
-            type: file
+      description: FastQC report
-            description: FastQC report
+      pattern: "*_fastqc.{zip,html}"
            pattern: "*_fastqc.{zip,html}"
 authors:
-    - "@ewels"
+  - "@grst"
-    - "@FelixKrueger"
+  - "@drpatelh"
  - "@ewels"
  - "@FelixKrueger"
--- a/software/fastqc/test/data/test_R1.fastq.gz
+++ b/software/fastqc/test/data/test_R1.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_R1.fastq.gz
--- a/software/fastqc/test/data/test_R2.fastq.gz
+++ b/software/fastqc/test/data/test_R2.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_R2.fastq.gz
--- a/software/fastqc/test/data/test_single_end.fastq.gz
+++ b/software/fastqc/test/data/test_single_end.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_single_end.fastq.gz
--- a/software/fastqc/test/main.nf
+++ b/software/fastqc/test/main.nf
@ -1,21 +1,31 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl = 2
-params.outdir = "."             // gets set in nextflow.config file (as './results/fastqc')
+params.out_dir = "test_output"
 params.fastqc_args = ''
-params.verbose = false
+params.publish_dir_mode = "copy"
-// TODO: check the output files in some way
+include { FASTQC } from '../main.nf'
 // include '../../../tests/functions/check_process_outputs.nf'
 include '../main.nf'
-// Define input channels
+/**
-ch_read_files = Channel 
+ * Test if FASTQC runs with single-end data
-    .fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
+ */
-    // .view()  // to check whether the input channel works
+workflow test_single_end {
-
+    input_files = Channel.fromPath("data/test_single_end.fastq.gz")
-// Run the workflow
+                    .map {f -> [f.baseName, true, f]}
-workflow {
+    FASTQC(input_files)
-    FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose)
+}
-    // .check_output()
+
 /**
 * Test if FASTQC runs with paired end data
 */
 workflow test_paired_end {
    input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
                    .map {f -> [f[0], false, f[1]]}
    FASTQC(input_files)
 }
 workflow {
    test_single_end()
    test_paired_end()
 }
--- a/software/fastqc/test/nextflow.config
+++ b/software/fastqc/test/nextflow.config
@ -1,2 +0,0 @@
 // docker.enabled = true
 params.outdir = './results/fastqc'
--- a/tests/data/fastq/rna/test_R1.fastq.gz
+++ b/tests/data/fastq/rna/test_R1.fastq.gz
--- a/tests/data/fastq/rna/test_R2.fastq.gz
+++ b/tests/data/fastq/rna/test_R2.fastq.gz
--- a/tests/data/fastq/rna/test_single_end.fastq.gz
+++ b/tests/data/fastq/rna/test_single_end.fastq.gz
		`@ -0,0 +1 @@`
							`../../../../tests/data/fastq/rna/test_R1.fastq.gz`
		`@ -0,0 +1 @@`
							`../../../../tests/data/fastq/rna/test_single_end.fastq.gz`
		`@ -1,2 +0,0 @@`
			`// docker.enabled = true`
			`params.outdir = './results/fastqc'`