Merge pull request #40 from grst/update-fastqc

Update fastqc to adhere to new module guidelines
2024-12-31 11:42:12 -05:00 · 2020-07-15 12:59:28 +01:00 · 2020-07-15 12:59:28 +01:00 · 6028bb080b
commit 6028bb080b
parent 741b0786f9 a2bcb5c36a
12 changed files with 113 additions and 69 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,6 @@
 .nextflow*
 work/
 results/
-./data
+test_output/
 .DS_Store
 *.code-workspace
--- a/software/fastqc/environment.yml
+++ b/software/fastqc/environment.yml
@ -6,4 +6,4 @@ channels:
  - bioconda
  - defaults
 dependencies:
-  - fastqc=0.11.8
+  - fastqc=0.11.9
--- a/software/fastqc/main.nf
+++ b/software/fastqc/main.nf
@ -1,37 +1,40 @@
-nextflow.preview.dsl = 2
+def MODULE = "fastqc"
 params.publish_dir = MODULE
 params.publish_results = "default"
 process FASTQC {
    publishDir "${params.out_dir}/${params.publish_dir}",
        mode: params.publish_dir_mode,
        saveAs: { filename ->
                    if (params.publish_results == "none") null
                    else filename }
-    // tag "FastQC - $sample_id"
+    container "docker.pkg.github.com/nf-core/$MODULE"
    conda "${moduleDir}/environment.yml"
    input:
-        tuple val(name), path(reads)
+    tuple val(name), val(single_end), path(reads)
        val (outputdir)
        // fastqc_args are best passed into the workflow in the following manner:
        // --fastqc_args="--nogroup -a custom_adapter_file.txt"
        val (fastqc_args)
        val (verbose)
    output:
-        tuple val(name), path ("*fastqc*"), emit: all
+    tuple val(name), val(single_end), path("*.html"), emit: html
-        path "*.zip",                       emit: report // e.g. for MultiQC later
+    tuple val(name), val(single_end), path("*.zip"), emit: zip
-
+    path "*.version.txt", emit: version
    // container 'quay.io/biocontainers/fastqc:0.11.8--2'
    publishDir "$outputdir",
        mode: "copy", overwrite: true
    script:
-
+    // Add soft-links to original FastQs for consistent naming in pipeline
-        if (verbose){
+    if (single_end) {
-            println ("[MODULE] FASTQC ARGS: " + fastqc_args)
+        """
        [ ! -f  ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz
        fastqc ${params.fastqc_args} --threads $task.cpus ${name}.fastq.gz
        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
        """
    } else {
        """
        [ ! -f  ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz
        [ ! -f  ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz
        fastqc ${params.fastqc_args} --threads $task.cpus ${name}_1.fastq.gz ${name}_2.fastq.gz
        fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
        """
    }
        """
        module load fastqc
        fastqc $fastqc_args -q -t 2 $reads
        fastqc --version &> fastqc.version.txt
        """
 }
--- a/software/fastqc/meta.yml
+++ b/software/fastqc/meta.yml
@ -14,20 +14,50 @@ tools:
        overrepresented sequences.
      homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
      documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
 params:
  - fastqc_args:
      type: string
      description: Additional command line arguments passed to fastqc.
  - out_dir:
      type: string
      description: |
        The pipeline's output directory. By default, the module will
        output files into `$out_dir/MODULE_NAME`
  - publish_dir:
      type: string
      description: |
        Append to the path for the standard output directory provided by `$out_dir`.
  - publish_dir_mode:
      type: string
      description: |
        Provide a value for the Nextflow `publishDir` mode parameter
        (e.g. copy, link, ...)
  - publish_results:
      type: string
      description: |
        Whether or not to publish results into `publish_dir`. Set to `none` to not
        publish any files at all; to `default` to publish all relevant files.
 input:
    -
  - name:
      type: string
      description: Sample identifier
  - single_end:
      type: boolean
      description: |
        Boolean indicating whether the corresponding sample is single-end (true)
        or paired-end (false).
  - reads:
      type: file
-            description: Input FastQ file, or pair of files
+      description: |
        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
        respectively.
 output:
    -
  - report:
      type: file
      description: FastQC report
      pattern: "*_fastqc.{zip,html}"
 authors:
  - "@grst"
  - "@drpatelh"
  - "@ewels"
  - "@FelixKrueger"
--- a/software/fastqc/test/data/test_R1.fastq.gz
+++ b/software/fastqc/test/data/test_R1.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_R1.fastq.gz
--- a/software/fastqc/test/data/test_R2.fastq.gz
+++ b/software/fastqc/test/data/test_R2.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_R2.fastq.gz
--- a/software/fastqc/test/data/test_single_end.fastq.gz
+++ b/software/fastqc/test/data/test_single_end.fastq.gz
@ -0,0 +1 @@
 ../../../../tests/data/fastq/rna/test_single_end.fastq.gz
--- a/software/fastqc/test/main.nf
+++ b/software/fastqc/test/main.nf
@ -1,21 +1,31 @@
 #!/usr/bin/env nextflow
 nextflow.preview.dsl = 2
-params.outdir = "."             // gets set in nextflow.config file (as './results/fastqc')
+params.out_dir = "test_output"
 params.fastqc_args = ''
-params.verbose = false
+params.publish_dir_mode = "copy"
-// TODO: check the output files in some way
+include { FASTQC } from '../main.nf'
 // include '../../../tests/functions/check_process_outputs.nf'
 include '../main.nf'
-// Define input channels
+/**
-ch_read_files = Channel 
+ * Test if FASTQC runs with single-end data
-    .fromFilePairs('../../../test-datasets/test*{1,2}.fastq.gz',size:-1)
+ */
-    // .view()  // to check whether the input channel works
+workflow test_single_end {
-
+    input_files = Channel.fromPath("data/test_single_end.fastq.gz")
-// Run the workflow
+                    .map {f -> [f.baseName, true, f]}
-workflow {
+    FASTQC(input_files)
-    FASTQC (ch_read_files, params.outdir, params.fastqc_args, params.verbose)
+}
-    // .check_output()
+
 /**
 * Test if FASTQC runs with paired end data
 */
 workflow test_paired_end {
    input_files = Channel.fromFilePairs("data/test_R{1,2}.fastq.gz")
                    .map {f -> [f[0], false, f[1]]}
    FASTQC(input_files)
 }
 workflow {
    test_single_end()
    test_paired_end()
 }
--- a/software/fastqc/test/nextflow.config
+++ b/software/fastqc/test/nextflow.config
@ -1,2 +0,0 @@
 // docker.enabled = true
 params.outdir = './results/fastqc'
--- a/tests/data/fastq/rna/test_R1.fastq.gz
+++ b/tests/data/fastq/rna/test_R1.fastq.gz
--- a/tests/data/fastq/rna/test_R2.fastq.gz
+++ b/tests/data/fastq/rna/test_R2.fastq.gz
--- a/tests/data/fastq/rna/test_single_end.fastq.gz
+++ b/tests/data/fastq/rna/test_single_end.fastq.gz
		`@ -0,0 +1 @@`
							`../../../../tests/data/fastq/rna/test_R1.fastq.gz`
		`@ -0,0 +1 @@`
							`../../../../tests/data/fastq/rna/test_single_end.fastq.gz`
		`@ -1,2 +0,0 @@`
			`// docker.enabled = true`
			`params.outdir = './results/fastqc'`