diff --git a/.github/workflows/cutadapt.yml b/.github/workflows/cutadapt.yml new file mode 100644 index 00000000..ba92681d --- /dev/null +++ b/.github/workflows/cutadapt.yml @@ -0,0 +1,34 @@ +name: cutadapt +on: + push: {} + pull_request: + paths: tools/cutadapt/* + +jobs: + run_ci_test: + runs-on: ubuntu-latest + + steps: + # Check out the repository + - uses: actions/checkout@v2 + - name: Checkout submodules + shell: bash + run: | + auth_header="$(git config --local --get http.https://github.com/.extraheader)" + git submodule sync --recursive + git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Test module with paired-end data + run: | + cd tools/cutadapt/test_paired/ + nextflow run . -ansi-log false + + - name: Test module with single-end data + run: | + cd tools/cutadapt/test_single/ + nextflow run . -ansi-log false diff --git a/README.md b/README.md index b97d8e1e..276f8943 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,14 @@ A repository for hosting nextflow [`DSL2`](https://www.nextflow.io/docs/edge/dsl * [Uploading to `nf-core/modules`](#uploading-to-nf-coremodules) * [Help](#help) +## Terminology + +The features offered by Nextflow DSL 2 can be used in various ways depending on the granularity with which you would like to write pipelines. Please see the listing below for the hierarchy and associated terminology we have decided to use when referring to DSL 2 components: + +* *Module*: A `process`that can be used within different pipelines and is as atomic as possible i.e. cannot be split into another module. An example of this would be a module file containing the process definition for a single tool such as `FastQC`. This repository has been created to only host atomic module files that should be added to the `tools` sub-directory along with the required documentation, software and tests. +* *Sub-workflow*: A chain of multiple modules that offer a higher-level of functionality within the context of a pipeline. For example, a sub-workflow to run multiple QC tools with FastQ files as input. Sub-workflows should be shipped with the pipeline implementation and if required they should be shared amongst different pipelines directly from there. As it stands, this repository will not host sub-workflows. +* *Workflow*: What DSL 1 users would consider an end-to-end pipeline. For example, from one or more inputs to a series of outputs. This can either be implemented using a large monolithic script as with DSL 1, or by using a combination of DSL 2 individual modules and sub-workflows. + ## Using existing modules The Nextflow [`include`](https://www.nextflow.io/docs/edge/dsl2.html#modules-include) statement can be used within your pipelines in order to load module files that you have available locally. diff --git a/assets/nf-core-modules_social_preview.png b/assets/nf-core-modules_social_preview.png new file mode 100644 index 00000000..5f1cf9cd Binary files /dev/null and b/assets/nf-core-modules_social_preview.png differ diff --git a/assets/nf-core-modules_social_preview.svg b/assets/nf-core-modules_social_preview.svg new file mode 100644 index 00000000..25821790 --- /dev/null +++ b/assets/nf-core-modules_social_preview.svg @@ -0,0 +1,450 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + Repository to host tool-specific module files forthe Nextflow DSL2 community! + modules + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test-datasets b/test-datasets index e5fef889..aae85a5c 160000 --- a/test-datasets +++ b/test-datasets @@ -1 +1 @@ -Subproject commit e5fef88994b8d34c7bf4b07116e5f7a330d2ee3b +Subproject commit aae85a5c9c72238959108212481ce83bae569709 diff --git a/tools/bwa/index/main.nf b/tools/bwa/index/main.nf new file mode 100644 index 00000000..0c943bb4 --- /dev/null +++ b/tools/bwa/index/main.nf @@ -0,0 +1,16 @@ +process bwa_index { + tag {fasta} + + container 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' + + input: + path(fasta) + + output: + path("${fasta}.*") + + script: + """ + bwa index ${fasta} + """ +} \ No newline at end of file diff --git a/tools/bwa/index/meta.yml b/tools/bwa/index/meta.yml new file mode 100644 index 00000000..d03c71a2 --- /dev/null +++ b/tools/bwa/index/meta.yml @@ -0,0 +1,25 @@ +name: bwa index +description: create indexes for BWA from a fasta file +keywords: + - index +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 +input: + - + - input: + type: file + description: Input fasta file + pattern: *.{fasta,fa} +output: + - + - index: + type: file + description: bwa indexes file + pattern: *.{fasta,fa}.{amb,ann,bwt,pac,sa} +authors: + - @maxulysse diff --git a/tools/bwa/index/test/main.nf b/tools/bwa/index/test/main.nf new file mode 100644 index 00000000..6acb4d61 --- /dev/null +++ b/tools/bwa/index/test/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) +include '../main.nf' params(params) + +// Define input channels +input = '../../../test-datasets/tools/bwa/index/input/reference.fasta' +Channel + .from(input) + .set { ch_input } + +// Run the workflow +workflow { + fastqc(ch_input) + // .check_output() +} diff --git a/tools/bwa/index/test/nextflow.config b/tools/bwa/index/test/nextflow.config new file mode 100644 index 00000000..c137a138 --- /dev/null +++ b/tools/bwa/index/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results' diff --git a/tools/cutadapt/main.nf b/tools/cutadapt/main.nf new file mode 100644 index 00000000..bdd444af --- /dev/null +++ b/tools/cutadapt/main.nf @@ -0,0 +1,45 @@ +process cutadapt { + tag "${sample_id}" + + container 'quay.io/biocontainers/cutadapt:1.16--py27_1' + + input: + tuple val(sample_id), file(reads) + + output: + tuple sample_id, file("trimmed_*.fastq") + + script: + forward_fq = "trimmed_1.fastq" + reverse_fq = "trimmed_2.fastq" + + + if (params.singleEnd) { + processing = """ + cutadapt \ + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --output ${forward_fq} \ + ${reads} + """ + } else { + processing = """ + cutadapt \ + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --pair-filter=any \ + --output ${forward_fq} \ + --paired-output ${reverse_fq} ${reads} + + + """ + } + + version = """ + cutadapt --version &> v_cutadapt.txt + """ + + return processing + version +} diff --git a/tools/cutadapt/meta.yml b/tools/cutadapt/meta.yml new file mode 100644 index 00000000..8df0b244 --- /dev/null +++ b/tools/cutadapt/meta.yml @@ -0,0 +1,36 @@ +name: Cutadapt +description: cutadapt removes adapter sequences from high-throughput sequencing reads +keywords: + - Quality Control + - QC + - Adapters +tools: + - fastqc: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence + from your high-throughput sequencing reads. + + Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ + sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads + start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but + often you don’t want them to be in your reads. + homepage: https://cutadapt.readthedocs.io/en/stable/ + documentation: https://cutadapt.readthedocs.io/en/stable/ +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: Input FastQ file, or pair of files +output: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: trimmed FastQ file, or pair of files +authors: + - @piotr-faba-ardigen diff --git a/tools/cutadapt/test_paired/main.nf b/tools/cutadapt/test_paired/main.nf new file mode 100644 index 00000000..46d0fdc6 --- /dev/null +++ b/tools/cutadapt/test_paired/main.nf @@ -0,0 +1,14 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../main.nf' params(params) + +// Define input channels + +Channel + .fromFilePairs('../../../test-datasets/tools/cutadapt/input/*_{1,2}.fastq' ) + .set { ch_read_files } + +// Run the workflow +workflow { + cutadapt(ch_read_files) +} diff --git a/tools/cutadapt/test_paired/nextflow.config b/tools/cutadapt/test_paired/nextflow.config new file mode 100644 index 00000000..08e52203 --- /dev/null +++ b/tools/cutadapt/test_paired/nextflow.config @@ -0,0 +1,9 @@ +docker.enabled = true +params.outdir = './results' + +params{ + //preprocessing options + cutadapt_min_length = 40 + cutadapt_min_quality = 25 + singleEnd = false +} diff --git a/tools/cutadapt/test_single/main.nf b/tools/cutadapt/test_single/main.nf new file mode 100644 index 00000000..657e2428 --- /dev/null +++ b/tools/cutadapt/test_single/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../main.nf' params(params) + +// Define input channels + +readPaths = [ + ['SRR4238351', '../../../test-datasets/tools/cutadapt/input/SRR4238351_subsamp.fastq.gz'], + ['SRR4238355', '../../../test-datasets/tools/cutadapt/input/SRR4238355_subsamp.fastq.gz'], + ['SRR4238359', '../../../test-datasets/tools/cutadapt/input/SRR4238359_subsamp.fastq.gz'], + ['SRR4238379', '../../../test-datasets/tools/cutadapt/input/SRR4238379_subsamp.fastq.gz'] +] +Channel + .from(readPaths) + .map { row -> [ row[0], [ file(row[1]) ] ] } + .set { ch_read_files } + +// Run the workflow +workflow { + cutadapt(ch_read_files) +} diff --git a/tools/cutadapt/test_single/nextflow.config b/tools/cutadapt/test_single/nextflow.config new file mode 100644 index 00000000..4b805ff3 --- /dev/null +++ b/tools/cutadapt/test_single/nextflow.config @@ -0,0 +1,9 @@ +docker.enabled = true +params.outdir = './results' + +params{ + //preprocessing options + cutadapt_min_length = 40 + cutadapt_min_quality = 25 + singleEnd = true +} diff --git a/tools/gatk/dict/main.nf b/tools/gatk/dict/main.nf new file mode 100644 index 00000000..49edf5a1 --- /dev/null +++ b/tools/gatk/dict/main.nf @@ -0,0 +1,19 @@ +process gatk_dict { + tag {fasta} + + container 'quay.io/biocontainers/gatk4-spark:4.1.4.1--1' + + input: + path(fasta) + + output: + path("${fasta.baseName}.dict") + + script: + """ + gatk --java-options "-Xmx${task.memory.toGiga()}g" \ + CreateSequenceDictionary \ + --REFERENCE ${fasta} \ + --OUTPUT ${fasta.baseName}.dict + """ +} \ No newline at end of file diff --git a/tools/gatk/dict/meta.yml b/tools/gatk/dict/meta.yml new file mode 100644 index 00000000..bc9e18b6 --- /dev/null +++ b/tools/gatk/dict/meta.yml @@ -0,0 +1,25 @@ +name: gatk dict +description: create a dictionary file from a fasta file +keywords: + - dictionary +tools: + - gatk: + description: | + The GATK toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping, developed in the Data Sciences Platform at the Broad Institute. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 +input: + - + - input: + type: file + description: Input fasta file + pattern: *.{fasta,fa} +output: + - + - dict: + type: file + description: gatk dictionary file + pattern: *.{fasta,fa}.{dict} +authors: + - @maxulysse \ No newline at end of file diff --git a/tools/gatk/dict/test/main.nf b/tools/gatk/dict/test/main.nf new file mode 100644 index 00000000..41c1862c --- /dev/null +++ b/tools/gatk/dict/test/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) +include '../main.nf' params(params) + +// Define input channels +input = '../../../test-datasets/tools/bwa/index/input/reference.fasta' + +// Run the workflow +workflow { + gatk_dict(input) + // .check_output() +} diff --git a/tools/gatk/dict/test/nextflow.config b/tools/gatk/dict/test/nextflow.config new file mode 100644 index 00000000..c137a138 --- /dev/null +++ b/tools/gatk/dict/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results' diff --git a/tools/htslib/tabix/main.nf b/tools/htslib/tabix/main.nf new file mode 100644 index 00000000..e228bb02 --- /dev/null +++ b/tools/htslib/tabix/main.nf @@ -0,0 +1,16 @@ +process htslib_tabix { + tag {vcf} + + container 'quay.io/biocontainers/tabix:0.2.6--ha92aebf_0' + + input: + path(vcf) + + output: + path("${vcf}.tbi") + + script: + """ + tabix -p vcf ${vcf} + """ +} diff --git a/tools/htslib/tabix/meta.yml b/tools/htslib/tabix/meta.yml new file mode 100644 index 00000000..027780c3 --- /dev/null +++ b/tools/htslib/tabix/meta.yml @@ -0,0 +1,26 @@ +name: htslib tabix +description: create tabix index from a bgzip vcf file +keywords: + - index + - tabix +tools: + - bwa: + description: | + Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/ + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 +input: + - + - input: + type: file + description: Input vcf.gz file + pattern: *.{vcf.gz} +output: + - + - index: + type: file + description: tabix index file + pattern: *.{vcf.gz.tbi} +authors: + - @maxulysse \ No newline at end of file diff --git a/tools/htslib/tabix/test/main.nf b/tools/htslib/tabix/test/main.nf new file mode 100644 index 00000000..6fd80089 --- /dev/null +++ b/tools/htslib/tabix/test/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) +include '../main.nf' params(params) + +// Define input channels +input = '../../../test-datasets/tools/file.vcf.gz' + +// Run the workflow +workflow { + tabix_index(ch_read_files) + // .check_output() +} diff --git a/tools/htslib/tabix/test/nextflow.config b/tools/htslib/tabix/test/nextflow.config new file mode 100644 index 00000000..c137a138 --- /dev/null +++ b/tools/htslib/tabix/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results' diff --git a/tools/samtools/faidx/main.nf b/tools/samtools/faidx/main.nf new file mode 100644 index 00000000..458a5ab8 --- /dev/null +++ b/tools/samtools/faidx/main.nf @@ -0,0 +1,16 @@ +process samtools_faidx { + tag {fasta} + + container 'quay.io/biocontainers/samtools:1.9--h10a08f8_12' + + input: + path(fasta) + + output: + path("${fasta}.fai") + + script: + """ + samtools faidx ${fasta} + """ +} diff --git a/tools/samtools/faidx/meta.yml b/tools/samtools/faidx/meta.yml new file mode 100644 index 00000000..1e402057 --- /dev/null +++ b/tools/samtools/faidx/meta.yml @@ -0,0 +1,27 @@ +name: samtools faidx +description: index a fasta file +keywords: + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 +input: + - + - input: + type: file + description: Input fasta file + pattern: *.{fasta,fa} +output: + - + - faidx: + type: file + description: samtools index fasta file + pattern: *.fasta.fai +authors: + - @maxulysse \ No newline at end of file diff --git a/tools/samtools/faidx/test/main.nf b/tools/samtools/faidx/test/main.nf new file mode 100644 index 00000000..f01ab6c9 --- /dev/null +++ b/tools/samtools/faidx/test/main.nf @@ -0,0 +1,13 @@ +#!/usr/bin/env nextflow +nextflow.preview.dsl = 2 +include '../../../nf-core/module_testing/check_process_outputs.nf' params(params) +include '../main.nf' params(params) + +// Define input channels +input = '../../../test-datasets/tools/bwa/index/input/reference.fasta' + +// Run the workflow +workflow { + samtools_faidx(input) + // .check_output() +} diff --git a/tools/samtools/faidx/test/nextflow.config b/tools/samtools/faidx/test/nextflow.config new file mode 100644 index 00000000..c137a138 --- /dev/null +++ b/tools/samtools/faidx/test/nextflow.config @@ -0,0 +1,2 @@ +docker.enabled = true +params.outdir = './results' diff --git a/tools/samtools/index/main.nf b/tools/samtools/index/main.nf index acf1af41..7b065fa9 100644 --- a/tools/samtools/index/main.nf +++ b/tools/samtools/index/main.nf @@ -7,15 +7,14 @@ process samtools_index { path(bam) output: - path "*.sorted.bam" + path "*.bai" script: def suff_mem = ("${(task.memory.toBytes() - 6000000000) / task.cpus}" > 2000000000) ? 'true' : 'false' def avail_mem = (task.memory && suff_mem) ? "-m" + "${(task.memory.toBytes() - 6000000000) / task.cpus}" : '' """ - samtools sort $bam \\ - -@ ${task.cpus} ${avail_mem} \\ - -o ${bam.baseName}.sorted.bam + samtools index $bam \\ + -@ ${task.cpus} ${avail_mem} samtools --version &> v_samtools.txt """ diff --git a/tools/samtools/index/meta.yml b/tools/samtools/index/meta.yml index 3cd7a402..e665789b 100644 --- a/tools/samtools/index/meta.yml +++ b/tools/samtools/index/meta.yml @@ -1,7 +1,7 @@ -name: samtools sort -description: Sort a BAM or CRAM file +name: samtools index +description: index a BAM or CRAM file keywords: - - sort + - index tools: - samtools: description: | diff --git a/tools/shovill/main.nf b/tools/shovill/main.nf new file mode 100644 index 00000000..cf52ba10 --- /dev/null +++ b/tools/shovill/main.nf @@ -0,0 +1,20 @@ +process shovill { + + tag { shovill } + + publishDir "${params.outdir}", pattern: '*.fasta', mode: 'copy' + + container "quay.io/biocontainers/shovill:1.0.9--0" + + input: + tuple(sample_id, path(forward), path(reverse)) + + output: + path("${sample_id}.fasta") + + script: + """ + shovill --R1 ${forward} --R2 ${reverse} --outdir shovill_out + mv shovill_out/contigs.fa ${sample_id}.fasta + """ +} diff --git a/tools/shovill/meta.yml b/tools/shovill/meta.yml new file mode 100644 index 00000000..7c204c24 --- /dev/null +++ b/tools/shovill/meta.yml @@ -0,0 +1,30 @@ +name: Shovill +description: Create a bacterial assembly from paired fastq using shovill +keywords: + - Genome Assembly + - Bacterial Isolates +tools: + - fastqc: + description: | + Shovill assembles bacterial isolate genomes from Illumina + paired-end reads. Shovill uses the SPAdes genome assembler, + providing pre and post-processing to the SPAdes assembly. + It also supports SKESA, Velvet and Megahit. + homepage: https://github.com/tseemann/shovill + documentation: https://github.com/tseemann/shovill/blob/master/README.md +input: + - + - sample_id: + type: string + description: Sample identifier + - reads: + type: file + description: pair of fastq files +output: + - + - assembly: + type: file + description: fasta file + pattern: ${sample_id}.fasta +authors: + - @annacprice diff --git a/tools/shovill/test/main.nf b/tools/shovill/test/main.nf new file mode 100644 index 00000000..85ba6627 --- /dev/null +++ b/tools/shovill/test/main.nf @@ -0,0 +1,17 @@ +#!/usr/bin/env nextflow + +nextflow.preview.dsl = 2 + +// import shovill +include {shovill} from '../main.nf' params(params) + +// define input channel +readsPath = '../../../test-datasets/tools/shovill/input/SRR3609257_{1,2}.fastq.gz' +Channel + .fromFilePairs( "${readsPath}", flat: true ) + .set{ ch_reads } + +// main workflow +workflow { + shovill(ch_reads) +} diff --git a/tools/shovill/test/nextflow.config b/tools/shovill/test/nextflow.config new file mode 100644 index 00000000..44cfb78d --- /dev/null +++ b/tools/shovill/test/nextflow.config @@ -0,0 +1,5 @@ +// docker +docker.enabled = true + +// output directory +params.outdir = './results'