diff --git a/modules/genrich/functions.nf b/modules/genrich/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/genrich/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/genrich/main.nf b/modules/genrich/main.nf new file mode 100644 index 00000000..c947e9cf --- /dev/null +++ b/modules/genrich/main.nf @@ -0,0 +1,69 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process GENRICH { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::genrich=0.6.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/genrich:0.6.1--h5bf99c6_1" + } else { + container "quay.io/biocontainers/genrich:0.6.1--h5bf99c6_1" + } + + input: + tuple val(meta), path(treatment_bam) + path control_bam + path blacklist_bed + + output: + tuple val(meta), path("*narrowPeak") , emit: peaks + tuple val(meta), path("*pvalues.bedGraph"), optional:true, emit: bedgraph_pvalues + tuple val(meta), path("*pileup.bedGraph") , optional:true, emit: bedgraph_pileup + tuple val(meta), path("*intervals.bed") , optional:true, emit: bed_intervals + tuple val(meta), path("*duplicates.txt") , optional:true, emit: duplicates + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def control = params.control_bam ? "-c $control_bam" : '' + def pvalues = params.pvalues ? "-f ${prefix}.pvalues.bedGraph" : "" + def pileup = params.pileup ? "-k ${prefix}.pileup.bedGraph" : "" + def bed = params.bed ? "-b ${prefix}.intervals.bed" : "" + def blacklist = params.blacklist_bed ? "-E $blacklist_bed" : "" + def duplicates = "" + if (params.save_duplicates) { + if (options.args.contains('-r')) { + duplicates = "-R ${prefix}.duplicates.txt" + } else { + log.info '[Genrich] Duplicates can only be saved if they are filtered, defaulting to -r option (Remove PCR duplicates).' + duplicates = "-r -R ${prefix}.duplicates.txt" + } + } + """ + Genrich \\ + -t $treatment_bam \\ + $options.args \\ + $control \\ + $blacklist \\ + -o ${prefix}.narrowPeak \\ + $pvalues \\ + $pileup \\ + $bed \\ + $duplicates \\ + $blacklist \\ + $control + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$(echo \$(Genrich --version 2>&1) | sed 's/^Genrich, version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/genrich/meta.yml b/modules/genrich/meta.yml new file mode 100644 index 00000000..8f7b004b --- /dev/null +++ b/modules/genrich/meta.yml @@ -0,0 +1,71 @@ +name: genrich +description: Peak-calling for ChIP-seq and ATAC-seq enrichment experiments +keywords: + - peak-calling + - ChIP-seq + - ATAC-seq +tools: + - genrich: + description: | + Genrich is a peak-caller for genomic enrichment assays (e.g. ChIP-seq, ATAC-seq). + It analyzes alignment files generated following the assay and produces a file + detailing peaks of significant enrichment. + homepage: https://github.com/jsh58/Genrich + documentation: https://github.com/jsh58/Genrich#readme + tool_dev_url: https://github.com/jsh58/Genrich + doi: "" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - treatment_bam: + type: file + description: Coordinate sorted BAM/SAM file from treatment sample + pattern: "*.{bam,sam}" + - control_bam: + type: file + description: Coordinate sorted BAM/SAM file from control sample + pattern: "*.{bam,sam}" + - blacklist_bed: + type: file + description: Bed file containing genomic intervals to exclude from the analysis + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: Output file is in ENCODE narrowPeak format + pattern: "*.{narrowPeak}" + - bedgraph_pvalues: + type: file + description: bedGraph file containing p/q values + pattern: "*.{pvalues.bedGraph}" + - bedgraph_pileup: + type: file + description: bedGraph file containing pileups and p-values + pattern: "*.{pileup.bedGraph}" + - bed_intervals: + type: file + description: Bed file containing annotated intervals + pattern: "*.{intervals.bed}" + - duplicates: + type: file + description: Text output file containing intervals corresponding to PCR duplicates + pattern: "*.{intervals.txt}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@JoseEspinosa" + diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 99eb271c..7093790b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -466,6 +466,10 @@ genmap/mappability: - modules/genmap/mappability/** - tests/modules/genmap/mappability/** +genrich: + - modules/genrich/** + - tests/modules/genrich/** + gffread: - modules/gffread/** - tests/modules/gffread/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 3c0308a0..1abae34d 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -123,6 +123,7 @@ params { 'illumina' { test_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam" test_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai" + test_paired_end_name_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam" test_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam" test_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai" test_paired_end_markduplicates_sorted_referencesn_txt = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.referencesn.txt" @@ -137,6 +138,7 @@ params { test2_paired_end_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam" test2_paired_end_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai" + test2_paired_end_name_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam" test2_paired_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam" test2_paired_end_markduplicates_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.markduplicates.sorted.bam.bai" test2_paired_end_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam" diff --git a/tests/modules/genrich/main.nf b/tests/modules/genrich/main.nf new file mode 100644 index 00000000..654b38e5 --- /dev/null +++ b/tests/modules/genrich/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GENRICH } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-p 0.1"] ) +include { GENRICH as GENRICH_BLACKLIST } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:true, save_duplicates:false, options: ["args": "-p 0.1"] ) +include { GENRICH as GENRICH_ALL_OUTPUTS } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: true, pileup:true, bed:true, blacklist_bed:false, save_duplicates:true, options: ["args": "-r -p 0.1"] ) +include { GENRICH as GENRICH_ATACSEQ } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-j -p 0.1"] ) + +workflow test_genrich { + input = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] + control = [ ] + blacklist = [ ] + + GENRICH ( input, control, blacklist ) +} + +workflow test_genrich_ctrl { + input = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] + control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] + blacklist = [ ] + + GENRICH ( input, control, blacklist ) +} + +workflow test_genrich_all_outputs { + input = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] + control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] + blacklist = [ ] + + GENRICH_ALL_OUTPUTS ( input, control, blacklist ) +} + +workflow test_genrich_atacseq { + input = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]] + control = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] + blacklist = [ ] + + GENRICH_ATACSEQ ( input, control, blacklist ) +} diff --git a/tests/modules/genrich/test.yml b/tests/modules/genrich/test.yml new file mode 100644 index 00000000..bd762f7c --- /dev/null +++ b/tests/modules/genrich/test.yml @@ -0,0 +1,39 @@ +- name: genrich test_genrich + command: nextflow run tests/modules/genrich -entry test_genrich -c tests/config/nextflow.config + tags: + - genrich + files: + - path: output/genrich/test.narrowPeak + md5sum: 6afabdd3f691c7c84c66ff8a23984681 + +- name: genrich test_genrich_ctrl + command: nextflow run tests/modules/genrich -entry test_genrich_ctrl -c tests/config/nextflow.config + tags: + - genrich + files: + - path: output/genrich/test.narrowPeak + md5sum: 6afabdd3f691c7c84c66ff8a23984681 + +- name: genrich test_genrich_all_outputs + command: nextflow run tests/modules/genrich -entry test_genrich_all_outputs -c tests/config/nextflow.config + tags: + - genrich + files: + - path: output/genrich/test.duplicates.txt + md5sum: a92893f905fd8b3751bc6a960fbfe7ba + - path: output/genrich/test.intervals.bed + md5sum: 52edf47e6641c0cc03f9cca7324f7eaa + - path: output/genrich/test.narrowPeak + md5sum: e45eb7d000387975050c2e85c164e5be + - path: output/genrich/test.pileup.bedGraph + md5sum: e4f7fa664cd4ed2cf3a1a3a9eb415e71 + - path: output/genrich/test.pvalues.bedGraph + md5sum: 564859953704983393d4b7d6317060cd + +- name: genrich test_genrich_atacseq + command: nextflow run tests/modules/genrich -entry test_genrich_atacseq -c tests/config/nextflow.config + tags: + - genrich + files: + - path: output/genrich/test.narrowPeak + md5sum: ddea556b820f8be3695ffdf6c6f70aff