diff --git a/README.md b/README.md index 614979c2..22add2b0 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ A repository for hosting [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl - [Using existing modules](#using-existing-modules) - [Adding a new module file](#adding-a-new-module-file) + - [Module template](#module-template) - [Guidelines](#guidelines) - [Testing](#testing) - [Documentation](#documentation) @@ -108,6 +109,25 @@ and to everyone within the Nextflow community! See [`software/`](software) for examples. +### Module template + +We have added a directory called [`software/SOFTWARE/TOOL/`](software/SOFTWARE/TOOL/) that serves as a template with which to create your own module submission. Where applicable, we have added extensive `TODO` statements to the files in this directory for general information, to help guide you as to where to make the appropriate changes, and how to make them. If in doubt, have a look at how we have done things for other modules. + +```console +. +├── software +│   ├── SOFTWARE +│   │   └── TOOL +│   │   ├── functions.nf ## Utility functions imported in main module script +│   │   ├── main.nf ## Main module script +│   │   ├── meta.yml ## Documentation for module, input, output, params, author +│   │   └── test +│   │   ├── input ## Soft-link input test data from "tests/" +│   │   ├── main.nf ## Minimal workflow to test module +│   │   ├── nextflow.config ## Minimal config to test module +│   │   └── output ## Upload output files from test for unit testing +``` + ### Guidelines The key words "MUST", "MUST NOT", "SHOULD", etc. are to be interpreted as described in [RFC 2119](https://tools.ietf.org/html/rfc2119). @@ -202,12 +222,14 @@ We also use a standardised parameter called `params.publish_dir_mode` that can b - Every module MUST be tested by adding a test workflow with a toy dataset in the [`test/`](software/fastqc/test) directory of the module. -- Generic files from [`tests/data/`](tests/data/) SHOULD be reused by symlinking them into the [`test/input/`](software/fastqc/test/input/) directory of the module. +- Generic files from [`tests/data/`](tests/data/) MUST be reused by symlinking them into the [`test/input/`](software/fastqc/test/input/) directory of the module. -- Any outputs produced by the test workflow SHOULD be placed in a folder called [`test/output/`](software/fastqc/test/output/) so that they can be used for unit testing. +- Any outputs produced by the test workflow MUST be placed in a folder called [`test/output/`](software/fastqc/test/output/) so that they can be used for unit testing. - If the appropriate test data doesn't exist for your module then it MUST be added to [`tests/data/`](tests/data/). +- A GitHub Actions workflow file MUST be added to [`.github/workflows/`](.github/workflows/) e.g. [`.github/workflows/fastqc.yml`](.github/workflows/fastqc.yml). + ### Documentation - A module MUST be documented in the [`meta.yml`](software/fastqc/meta.yml) file. It MUST document `params`, `input` and `output`. `input` and `output` MUST be a nested list. diff --git a/software/SOFTWARE/TOOL/meta.yml b/software/SOFTWARE/TOOL/meta.yml index d8ae9a7d..9fd8c1f7 100644 --- a/software/SOFTWARE/TOOL/meta.yml +++ b/software/SOFTWARE/TOOL/meta.yml @@ -1,42 +1,77 @@ -name: bwa mem -description: Performs fastq alignment to a fasta reference using the burrows-wheeler aligner +## TODO nf-core: Please delete all of these TODO statements once the file has been curated +## TODO nf-core: Change the name of "software_tool" below +name: software_tool +## TODO nf-core: Add a description and keywords +description: Run FastQC on sequenced reads keywords: - - mem - - bwa - - alignment + - Quality Control + - QC + - Adapters tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 + ## TODO nf-core: Change the name of "software_tool" below + - software_tool: + ## TODO nf-core: Add a description and other details for the tool below + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ +## TODO nf-core: If you are using any additional "params" in the main.nf script of the module add them below +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive +## TODO nf-core: Add a description of all of the variables used as input input: - - - - id: - type: val - description: read/read pair id - - reads: - type: file - description: Input fastq file - pattern: "*.{fastq,fq}" - - index: - type: file - description: bwa indexes file - pattern: "*.{amb,ann,bwt,pac,sa}" - - prefix: - type: val - description: bwa index prefix, equivalent to index file names without extensions. Usually the reference genome file name unless otherwise specified. + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - options: + type: map + description: | + Groovy Map containing module options for passing command-line arguments and + output file paths. +## TODO nf-core: Add a description of all of the variables used as output output: - - - - bam: - type: file - description: Output bam file - pattern: "*.bam" - - bamindex: - type: file - description: Output bam index file - pattern: "*.bai" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_fastqc.html" + - zip: + type: file + description: FastQC report archive + pattern: "*_fastqc.zip" + - version: + type: file + description: File containing software version + pattern: "*.version.txt" +## TODO nf-core: Add your GitHub username below authors: - - "@jeremy1805" + - "@your_github_username" diff --git a/software/SOFTWARE/TOOL/test/input/soflink_test_data_here b/software/SOFTWARE/TOOL/test/input/soflink_test_data_here new file mode 100644 index 00000000..e69de29b diff --git a/software/SOFTWARE/TOOL/test/main.nf b/software/SOFTWARE/TOOL/test/main.nf index 13fef901..80d0a347 100644 --- a/software/SOFTWARE/TOOL/test/main.nf +++ b/software/SOFTWARE/TOOL/test/main.nf @@ -2,14 +2,34 @@ nextflow.enable.dsl = 2 -include '../../../../tests/functions/check_process_outputs.nf' params(params) -include '../main.nf' params(params) +include { FASTQC } from '../main.nf' -reads = '../../../../test-datasets/tools/bwa/mem/reads/*_R{1,2}_001.fastq.gz' -index = '../../../../test-datasets/tools/bwa/mem/index/H3N2.{amb,ann,bwt,pac,sa}' -prefix = 'H3N2' +/* + * Test with single-end data + */ +workflow test_single_end { + + def input = [] + input = [ [ id:'test', single_end:true ], // meta map + [ file("${baseDir}/input/test_single_end.fastq.gz", checkIfExists: true) ] ] + + FASTQC ( input, [ publish_dir:'test_single_end' ] ) +} + +/* + * Test with paired-end data + */ +workflow test_paired_end { + + def input = [] + input = [ [ id:'test', single_end:false ], // meta map + [ file("${baseDir}/input/test_R1.fastq.gz", checkIfExists: true), + file("${baseDir}/input/test_R2.fastq.gz", checkIfExists: true) ] ] + + FASTQC ( input, [ publish_dir:'test_paired_end' ] ) +} workflow { - read_input=Channel.fromFilePairs(reads) - bwa_mem(read_input,file(index),prefix) + test_single_end() + test_paired_end() } diff --git a/software/SOFTWARE/TOOL/test/nextflow.config b/software/SOFTWARE/TOOL/test/nextflow.config index c137a138..ddb59275 100644 --- a/software/SOFTWARE/TOOL/test/nextflow.config +++ b/software/SOFTWARE/TOOL/test/nextflow.config @@ -1,2 +1,20 @@ -docker.enabled = true -params.outdir = './results' + +params { + outdir = "output/" + publish_dir_mode = "copy" + conda = false +} + +profiles { + conda { + params.conda = true + } + docker { + docker.enabled = true + docker.runOptions = '-u \$(id -u):\$(id -g)' + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + } +} diff --git a/software/SOFTWARE/TOOL/test/output/upload_test_data_output_here b/software/SOFTWARE/TOOL/test/output/upload_test_data_output_here new file mode 100644 index 00000000..e69de29b diff --git a/software/fastqc/meta.yml b/software/fastqc/meta.yml index 61039a34..e97c4cc0 100644 --- a/software/fastqc/meta.yml +++ b/software/fastqc/meta.yml @@ -41,11 +41,29 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - options: + type: map + description: | + Groovy Map containing module options for passing command-line arguments and + output file paths. output: - - report: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: type: file description: FastQC report - pattern: "*_fastqc.{zip,html}" + pattern: "*_fastqc.html" + - zip: + type: file + description: FastQC report archive + pattern: "*_fastqc.zip" + - version: + type: file + description: File containing software version + pattern: "*.version.txt" authors: - "@drpatelh" - "@grst"