diff --git a/.gitmodules b/.gitmodules index 15bfef35..44e06649 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "test-datasets"] - path = test-datasets - url = https://github.com/nf-core/test-datasets.git + path = test-datasets + url = https://github.com/nf-core/test-datasets.git diff --git a/README.md b/README.md index 276f8943..7730d9c7 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The features offered by Nextflow DSL 2 can be used in various ways depending on * *Module*: A `process`that can be used within different pipelines and is as atomic as possible i.e. cannot be split into another module. An example of this would be a module file containing the process definition for a single tool such as `FastQC`. This repository has been created to only host atomic module files that should be added to the `tools` sub-directory along with the required documentation, software and tests. * *Sub-workflow*: A chain of multiple modules that offer a higher-level of functionality within the context of a pipeline. For example, a sub-workflow to run multiple QC tools with FastQ files as input. Sub-workflows should be shipped with the pipeline implementation and if required they should be shared amongst different pipelines directly from there. As it stands, this repository will not host sub-workflows. -* *Workflow*: What DSL 1 users would consider an end-to-end pipeline. For example, from one or more inputs to a series of outputs. This can either be implemented using a large monolithic script as with DSL 1, or by using a combination of DSL 2 individual modules and sub-workflows. +* *Workflow*: What DSL 1 users would consider an end-to-end pipeline. For example, from one or more inputs to a series of outputs. This can either be implemented using a large monolithic script as with DSL 1, or by using a combination of DSL 2 individual modules and sub-workflows. ## Using existing modules diff --git a/tools/bowtie2/main.nf b/tools/bowtie2/main.nf index 6d1dc55e..467a5761 100644 --- a/tools/bowtie2/main.nf +++ b/tools/bowtie2/main.nf @@ -2,51 +2,51 @@ nextflow.preview.dsl=2 params.genome = '' process BOWTIE2 { - // depending on the genome used one might want/need to adjust the memory settings. - // For the E. coli test data this is probably not required + // depending on the genome used one might want/need to adjust the memory settings. + // For the E. coli test data this is probably not required + + // label 'bigMem' + // label 'multiCore' - // label 'bigMem' - // label 'multiCore' - input: - tuple val(name), path(reads) - val (outdir) - val (bowtie2_args) - val (verbose) + tuple val(name), path(reads) + val (outdir) + val (bowtie2_args) + val (verbose) - output: - path "*bam", emit: bam - path "*stats.txt", emit: stats + output: + path "*bam", emit: bam + path "*stats.txt", emit: stats - publishDir "$outdir/bowtie2", - mode: "copy", overwrite: true + publishDir "$outdir/bowtie2", + mode: "copy", overwrite: true - script: - if (verbose){ - println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args) - } + script: + if (verbose){ + println ("[MODULE] BOWTIE2 ARGS: " + bowtie2_args) + } - cores = 4 + cores = 4 - readString = "" + readString = "" - // Options we add are - bowtie2_options = bowtie2_args - bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file - - // single-end / paired-end distinction. Might also be handled via params.single_end - if (reads instanceof List) { - readString = "-1 " + reads[0] + " -2 " + reads[1] - } - else { - readString = "-U " + reads - } - - index = params.genome["bowtie2"] - bowtie2_name = name + "_" + params.genome["name"] + // Options we add are + bowtie2_options = bowtie2_args + bowtie2_options += " --no-unal " // We don't need unaligned reads in the BAM file - """ - bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam - """ + // single-end / paired-end distinction. Might also be handled via params.single_end + if (reads instanceof List) { + readString = "-1 " + reads[0] + " -2 " + reads[1] + } + else { + readString = "-U " + reads + } + + index = params.genome["bowtie2"] + bowtie2_name = name + "_" + params.genome["name"] + + """ + bowtie2 -x ${index} -p ${cores} ${bowtie2_options} ${readString} 2>${bowtie2_name}_bowtie2_stats.txt | samtools view -bS -F 4 -F 8 -F 256 -> ${bowtie2_name}_bowtie2.bam + """ } diff --git a/tools/bowtie2/meta.yml b/tools/bowtie2/meta.yml index b3b31a94..f30ee823 100644 --- a/tools/bowtie2/meta.yml +++ b/tools/bowtie2/meta.yml @@ -1,4 +1,4 @@ -name: Bowtie 2 +name: Bowtie 2 description: Ultrafast alignment to reference genome keywords: - Alignment diff --git a/tools/bwa/index/main.nf b/tools/bwa/index/main.nf index 0c943bb4..051ed70e 100644 --- a/tools/bwa/index/main.nf +++ b/tools/bwa/index/main.nf @@ -13,4 +13,4 @@ process bwa_index { """ bwa index ${fasta} """ -} \ No newline at end of file +} diff --git a/tools/bwa/mem/Dockerfile b/tools/bwa/mem/Dockerfile index 0be4dc78..9a3e0caa 100644 --- a/tools/bwa/mem/Dockerfile +++ b/tools/bwa/mem/Dockerfile @@ -1,9 +1,7 @@ FROM nfcore/base LABEL authors="Jeremy Guntoro" \ - description="Docker image containing all requirements for nf-core/modules/bwa/mem module" + description="Docker image containing all requirements for nf-core/modules/bwa/mem module" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a ENV PATH /opt/conda/envs/nf-core-bwa-mem/bin:$PATH - - diff --git a/tools/cutadapt/main.nf b/tools/cutadapt/main.nf index bdd444af..734b30b1 100644 --- a/tools/cutadapt/main.nf +++ b/tools/cutadapt/main.nf @@ -13,27 +13,27 @@ process cutadapt { forward_fq = "trimmed_1.fastq" reverse_fq = "trimmed_2.fastq" - + if (params.singleEnd) { processing = """ cutadapt \ - -j ${task.cpus} \ - -q $params.cutadapt_min_quality \ - --minimum-length $params.cutadapt_min_length \ - --output ${forward_fq} \ - ${reads} + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --output ${forward_fq} \ + ${reads} """ } else { processing = """ cutadapt \ - -j ${task.cpus} \ - -q $params.cutadapt_min_quality \ - --minimum-length $params.cutadapt_min_length \ - --pair-filter=any \ - --output ${forward_fq} \ - --paired-output ${reverse_fq} ${reads} - - + -j ${task.cpus} \ + -q $params.cutadapt_min_quality \ + --minimum-length $params.cutadapt_min_length \ + --pair-filter=any \ + --output ${forward_fq} \ + --paired-output ${reverse_fq} ${reads} + + """ } diff --git a/tools/cutadapt/meta.yml b/tools/cutadapt/meta.yml index 8df0b244..bb3bb8a8 100644 --- a/tools/cutadapt/meta.yml +++ b/tools/cutadapt/meta.yml @@ -9,10 +9,10 @@ tools: description: | Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. - + Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ - sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads - start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but + sequencing adapter because the read is longer than the molecule that is sequenced. Amplicon reads + start with a primer sequence. Poly-A tails are useful for pulling out RNA from your sample, but often you don’t want them to be in your reads. homepage: https://cutadapt.readthedocs.io/en/stable/ documentation: https://cutadapt.readthedocs.io/en/stable/ diff --git a/tools/fastq_screen/main.nf b/tools/fastq_screen/main.nf index be088ff5..2eccf31f 100644 --- a/tools/fastq_screen/main.nf +++ b/tools/fastq_screen/main.nf @@ -1,38 +1,38 @@ nextflow.preview.dsl=2 process FASTQ_SCREEN { - - // depending on the number of genomes and the type of genome (e.g. plants!), memory needs to be ample! - // label 'bigMem' - // label 'multiCore' + + // depending on the number of genomes and the type of genome (e.g. plants!), memory needs to be ample! + // label 'bigMem' + // label 'multiCore' input: - tuple val(name), path(reads) - val (outputdir) - // fastq_screen_args are best passed in to the workflow in the following manner: - // --fastq_screen_args="--subset 200000 --force" - val (fastq_screen_args) - val (verbose) + tuple val(name), path(reads) + val (outputdir) + // fastq_screen_args are best passed in to the workflow in the following manner: + // --fastq_screen_args="--subset 200000 --force" + val (fastq_screen_args) + val (verbose) - output: - path "*png", emit: png - path "*html", emit: html - path "*txt", emit: report + output: + path "*png", emit: png + path "*html", emit: html + path "*txt", emit: report - publishDir "$outputdir", - mode: "link", overwrite: true + publishDir "$outputdir", + mode: "link", overwrite: true script: - println(name) - println(reads) - println(outputdir) - if (verbose){ - println ("[MODULE] FASTQ SCREEN ARGS: "+ fastq_screen_args) - } + println(name) + println(reads) + println(outputdir) + if (verbose){ + println ("[MODULE] FASTQ SCREEN ARGS: "+ fastq_screen_args) + } - """ - module load fastq_screen - fastq_screen $fastq_screen_args $reads - """ + """ + module load fastq_screen + fastq_screen $fastq_screen_args $reads + """ -} \ No newline at end of file +} diff --git a/tools/fastqc/Dockerfile b/tools/fastqc/Dockerfile index a6f4f8ce..5bfca1b9 100644 --- a/tools/fastqc/Dockerfile +++ b/tools/fastqc/Dockerfile @@ -1,6 +1,6 @@ FROM nfcore/base:1.7 LABEL authors="phil.ewels@scilifelab.se" \ - description="Docker image for nf-core modules fastqc" + description="Docker image for nf-core modules fastqc" # foobar COPY environment.yml / diff --git a/tools/fastqc/main.nf b/tools/fastqc/main.nf index 83956ae8..3ecf31ed 100644 --- a/tools/fastqc/main.nf +++ b/tools/fastqc/main.nf @@ -1,37 +1,37 @@ nextflow.preview.dsl = 2 -process FASTQC { - +process FASTQC { + // tag "FastQC - $sample_id" - input: - tuple val(name), path(reads) - val (outputdir) + input: + tuple val(name), path(reads) + val (outputdir) // fastqc_args are best passed into the workflow in the following manner: // --fastqc_args="--nogroup -a custom_adapter_file.txt" - val (fastqc_args) - val (verbose) + val (fastqc_args) + val (verbose) - output: - tuple val(name), path ("*fastqc*"), emit: all - path "*.zip", emit: report // e.g. for MultiQC later - - // container 'quay.io/biocontainers/fastqc:0.11.8--2' + output: + tuple val(name), path ("*fastqc*"), emit: all + path "*.zip", emit: report // e.g. for MultiQC later - publishDir "$outputdir", - mode: "copy", overwrite: true + // container 'quay.io/biocontainers/fastqc:0.11.8--2' - script: + publishDir "$outputdir", + mode: "copy", overwrite: true - if (verbose){ - println ("[MODULE] FASTQC ARGS: " + fastqc_args) - } + script: - """ - module load fastqc - fastqc $fastqc_args -q -t 2 $reads + if (verbose){ + println ("[MODULE] FASTQC ARGS: " + fastqc_args) + } + + """ + module load fastqc + fastqc $fastqc_args -q -t 2 $reads fastqc --version &> fastqc.version.txt - """ + """ } diff --git a/tools/fastqc/meta.yml b/tools/fastqc/meta.yml index 0d67f866..bfe0f6cf 100644 --- a/tools/fastqc/meta.yml +++ b/tools/fastqc/meta.yml @@ -29,6 +29,6 @@ output: description: FastQC report pattern: *_fastqc.{zip,html} authors: - - + - - @ewels - @FelixKrueger diff --git a/tools/gatk/dict/main.nf b/tools/gatk/dict/main.nf index 49edf5a1..9de2e2f8 100644 --- a/tools/gatk/dict/main.nf +++ b/tools/gatk/dict/main.nf @@ -16,4 +16,4 @@ process gatk_dict { --REFERENCE ${fasta} \ --OUTPUT ${fasta.baseName}.dict """ -} \ No newline at end of file +} diff --git a/tools/gatk/dict/meta.yml b/tools/gatk/dict/meta.yml index bc9e18b6..527d6a69 100644 --- a/tools/gatk/dict/meta.yml +++ b/tools/gatk/dict/meta.yml @@ -22,4 +22,4 @@ output: description: gatk dictionary file pattern: *.{fasta,fa}.{dict} authors: - - @maxulysse \ No newline at end of file + - @maxulysse diff --git a/tools/hisat2/main.nf b/tools/hisat2/main.nf index 55e6fe78..85baa120 100644 --- a/tools/hisat2/main.nf +++ b/tools/hisat2/main.nf @@ -15,17 +15,17 @@ process HISAT2 { output: path "*bam", emit: bam - path "*stats.txt", emit: stats + path "*stats.txt", emit: stats publishDir "$outdir/hisat2", mode: "copy", overwrite: true script: - + if (verbose){ println ("[MODULE] HISAT2 ARGS: " + hisat2_args) } - + cores = 4 readString = "" hisat_options = hisat2_args @@ -41,7 +41,7 @@ process HISAT2 { readString = "-U "+reads } index = params.genome["hisat2"] - + splices = '' if (params.genome.containsKey("hisat2_splices")){ splices = " --known-splicesite-infile " + params.genome["hisat2_splices"] diff --git a/tools/hisat2/meta.yml b/tools/hisat2/meta.yml index ee79ba99..d5f5e403 100644 --- a/tools/hisat2/meta.yml +++ b/tools/hisat2/meta.yml @@ -1,4 +1,4 @@ -name: HISAT2 +name: HISAT2 description: Graph-based alignment of next generation sequencing reads to a population of genomes keywords: - Alignment diff --git a/tools/htslib/tabix/meta.yml b/tools/htslib/tabix/meta.yml index 027780c3..d35ec2f2 100644 --- a/tools/htslib/tabix/meta.yml +++ b/tools/htslib/tabix/meta.yml @@ -23,4 +23,4 @@ output: description: tabix index file pattern: *.{vcf.gz.tbi} authors: - - @maxulysse \ No newline at end of file + - @maxulysse diff --git a/tools/multiqc/main.nf b/tools/multiqc/main.nf index 493497c1..ab932a2f 100644 --- a/tools/multiqc/main.nf +++ b/tools/multiqc/main.nf @@ -1,31 +1,31 @@ nextflow.preview.dsl=2 process MULTIQC { - + // tag "FastQC - $sample_id" - + input: - path (file) - val (outdir) - val (multiqc_args) + path (file) + val (outdir) + val (multiqc_args) // multiqc_args are best passed into the workflow in the following manner: // --multiqc_args="--exlude STAR --title custom_report_title" - val (verbose) + val (verbose) - output: - path "*html", emit: html + output: + path "*html", emit: html - publishDir "${outdir}/multiqc", - mode: "copy", overwrite: true + publishDir "${outdir}/multiqc", + mode: "copy", overwrite: true script: - if (verbose){ - println ("[MODULE] MULTIQC ARGS: " + multiqc_args) - } + if (verbose){ + println ("[MODULE] MULTIQC ARGS: " + multiqc_args) + } - """ - multiqc $multiqc_args -x work . - """ + """ + multiqc $multiqc_args -x work . + """ } diff --git a/tools/samtools/Dockerfile b/tools/samtools/Dockerfile index 3d5cf586..ff8c7c64 100644 --- a/tools/samtools/Dockerfile +++ b/tools/samtools/Dockerfile @@ -1,6 +1,6 @@ FROM nfcore/base:1.7 LABEL authors="phil.ewels@scilifelab.se" \ - description="Docker image for nf-core modules samtools" + description="Docker image for nf-core modules samtools" # foobar COPY environment.yml / diff --git a/tools/samtools/faidx/meta.yml b/tools/samtools/faidx/meta.yml index 1e402057..777eb051 100644 --- a/tools/samtools/faidx/meta.yml +++ b/tools/samtools/faidx/meta.yml @@ -24,4 +24,4 @@ output: description: samtools index fasta file pattern: *.fasta.fai authors: - - @maxulysse \ No newline at end of file + - @maxulysse diff --git a/tools/shovill/main.nf b/tools/shovill/main.nf index cf52ba10..5c6a3ee5 100644 --- a/tools/shovill/main.nf +++ b/tools/shovill/main.nf @@ -3,7 +3,7 @@ process shovill { tag { shovill } publishDir "${params.outdir}", pattern: '*.fasta', mode: 'copy' - + container "quay.io/biocontainers/shovill:1.0.9--0" input: @@ -11,7 +11,7 @@ process shovill { output: path("${sample_id}.fasta") - + script: """ shovill --R1 ${forward} --R2 ${reverse} --outdir shovill_out diff --git a/tools/tcoffee/main.nf b/tools/tcoffee/main.nf index 42945850..5d863a7d 100644 --- a/tools/tcoffee/main.nf +++ b/tools/tcoffee/main.nf @@ -14,4 +14,3 @@ process tcoffee { t_coffee -seq $fasta -outfile ${fasta}.aln """ } - diff --git a/tools/trim_galore/Dockerfile b/tools/trim_galore/Dockerfile index 56f75baa..ef13a1bf 100644 --- a/tools/trim_galore/Dockerfile +++ b/tools/trim_galore/Dockerfile @@ -1,6 +1,6 @@ FROM nfcore/base:1.7 LABEL authors="phil.ewels@scilifelab.se" \ - description="Docker image for nf-core modules trimgalore" + description="Docker image for nf-core modules trimgalore" # foobar COPY environment.yml / diff --git a/tools/trim_galore/main.nf b/tools/trim_galore/main.nf index 3a4e8496..eedbe896 100644 --- a/tools/trim_galore/main.nf +++ b/tools/trim_galore/main.nf @@ -13,43 +13,43 @@ params.three_prime_clip_r1 = 0 params.three_prime_clip_r2 = 0 -process TRIM_GALORE { - +process TRIM_GALORE { + // container 'quay.io/biocontainers/trim-galore:0.6.5--0' // maybe later // tag "$sample_id" - input: - tuple val (name), path (reads) - val (outdir) - val (trim_galore_args) - val (verbose) + input: + tuple val (name), path (reads) + val (outdir) + val (trim_galore_args) + val (verbose) + + output: + tuple val(name), path ("*fq.gz"), emit: reads + path "*trimming_report.txt", optional: true, emit: report - output: - tuple val(name), path ("*fq.gz"), emit: reads - path "*trimming_report.txt", optional: true, emit: report - // Trimming reports are not generated for e.g. --hardtrim5, --clock etc // saveAs: {filename -> // else if (filename.indexOf("trimming_report.txt") > 0) "logs/$filename" // else filename // } - publishDir "${outdir}/trim_galore", - mode: "copy", overwrite: true + publishDir "${outdir}/trim_galore", + mode: "copy", overwrite: true script: - if (verbose){ - println ("[MODULE] TRIM GALORE ARGS: " + trim_galore_args) - } - + if (verbose){ + println ("[MODULE] TRIM GALORE ARGS: " + trim_galore_args) + } + trim_galore_args += " --gzip " // we like small files - pairedString = 0 - if (reads instanceof List) { - pairedString = 1 + pairedString = 0 + if (reads instanceof List) { + pairedString = 1 trim_galore_args += " --paired " - } - + } + if (params.clip_r1 > 0){ trim_galore_args += " --clip_r1 ${params.clip_r1} " } @@ -62,12 +62,12 @@ process TRIM_GALORE { if (params.three_prime_clip_r2 > 0){ trim_galore_args += " --three_prime_clip_r2 ${params.three_prime_clip_r2} " } - + if (params.trim_nextseq > 0){ trim_galore_args += " --nextseq ${params.trim_nextseq} " - } - - + } + + // Pre-set parameters for certain bisulfite-seq applications if (params.singlecell){ trim_galore_args += " --clip_r1 6 " @@ -77,7 +77,7 @@ process TRIM_GALORE { } if (params.rrbs){ trim_galore_args += " --rrbs " - } + } if (params.pbat){ trim_galore_args += " --clip_r1 $params.pbat " if (pairedString == 1){ @@ -85,17 +85,16 @@ process TRIM_GALORE { } } - """ - module load trim_galore - trim_galore $trim_galore_args $reads - """ + """ + module load trim_galore + trim_galore $trim_galore_args $reads + """ } - - + diff --git a/tools/trim_galore/meta.yml b/tools/trim_galore/meta.yml index d6944fb6..b3d42f81 100644 --- a/tools/trim_galore/meta.yml +++ b/tools/trim_galore/meta.yml @@ -36,6 +36,6 @@ output: pattern: *trimming_report.txt authors: - - + - - @ewels - @FelixKrueger diff --git a/tools/umi_tools/Dockerfile b/tools/umi_tools/Dockerfile index 691f576f..19a390f3 100644 --- a/tools/umi_tools/Dockerfile +++ b/tools/umi_tools/Dockerfile @@ -1,8 +1,8 @@ FROM nfcore/base:1.7 LABEL authors="chris.cheshire@crick.ac.uk" \ - description="Docker image containing all requirements for the nf-core umi_tools module" + description="Docker image containing all requirements for the nf-core umi_tools module" # Install conda packages COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH \ No newline at end of file +ENV PATH /opt/conda/envs/nfcore-module-umitools/bin:$PATH diff --git a/tools/umi_tools/main.nf b/tools/umi_tools/main.nf index 049ceaf8..b3c89a97 100644 --- a/tools/umi_tools/main.nf +++ b/tools/umi_tools/main.nf @@ -12,7 +12,7 @@ process umitools_dedup { input: tuple val(sample_id), path(bam) - + output: tuple val(sample_id), path("${sample_id}.dedup.bam"), emit: dedupBam tuple val(sample_id), path("${sample_id}.dedup.bam.bai"), emit: dedupBai diff --git a/tools/umi_tools/umi_tools.yml b/tools/umi_tools/umi_tools.yml index 64bcb6aa..b04f0f4e 100644 --- a/tools/umi_tools/umi_tools.yml +++ b/tools/umi_tools/umi_tools.yml @@ -1,6 +1,6 @@ name: umi_tools version: 1.0 -description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. +description: Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. keywords: - UMI - RMT @@ -8,7 +8,7 @@ keywords: tools: - umi_tools: description: | - Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. + Tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) and single cell RNA-Seq cell barcodes. homepage: https://github.com/CGATOxford/UMI-tools documentation: https://umi-tools.readthedocs.io/en/latest/ processes: @@ -18,7 +18,7 @@ processes: The program will execute with the following pattern: umi_tools dedup --log={SAMPLE_ID}.dedup.log {params.umitools_dedup_args} -I {SAMPLE_ID}.bam -S {SAMPLE_ID}.dedup.bam --output-stats={SAMPLE_ID} description: | - Groups PCR duplicates and de-duplicates reads to yield one read per group. + Groups PCR duplicates and de-duplicates reads to yield one read per group. Use this when you want to remove the PCR duplicates prior to any downstream analysis. input: - sample_id: @@ -43,4 +43,3 @@ processes: authors: - @candiceh08 - @chris-cheshire -