From c48244b677d597cf1dbf15bd4a3354b43b4d585d Mon Sep 17 00:00:00 2001 From: Ilya Pletenev <56674821+i-pletenev@users.noreply.github.com> Date: Mon, 15 Nov 2021 15:51:40 +0300 Subject: [PATCH] Add new module 'ataqv/ataqv' (#998) * Add new module 'ataqv/ataqv' * Update main.nf * Update main.nf Co-authored-by: Harshil Patel --- modules/ataqv/ataqv/functions.nf | 78 ++++++++++++++++++++++++++++++ modules/ataqv/ataqv/main.nf | 56 +++++++++++++++++++++ modules/ataqv/ataqv/meta.yml | 66 +++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/ataqv/ataqv/main.nf | 69 ++++++++++++++++++++++++++ tests/modules/ataqv/ataqv/test.yml | 51 +++++++++++++++++++ 6 files changed, 324 insertions(+) create mode 100644 modules/ataqv/ataqv/functions.nf create mode 100644 modules/ataqv/ataqv/main.nf create mode 100644 modules/ataqv/ataqv/meta.yml create mode 100644 tests/modules/ataqv/ataqv/main.nf create mode 100644 tests/modules/ataqv/ataqv/test.yml diff --git a/modules/ataqv/ataqv/functions.nf b/modules/ataqv/ataqv/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/ataqv/ataqv/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/ataqv/ataqv/main.nf b/modules/ataqv/ataqv/main.nf new file mode 100644 index 00000000..5ddade28 --- /dev/null +++ b/modules/ataqv/ataqv/main.nf @@ -0,0 +1,56 @@ +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process ATAQV_ATAQV { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::ataqv=1.2.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/ataqv:1.2.1--py39ha23c084_2" + } else { + container "quay.io/biocontainers/ataqv:1.2.1--py36hfdecbe1_2" + } + + input: + tuple val(meta), path(bam), path(bai), path(peak_file) + val organism + path tss_file + path excl_regs_file + path autosom_ref_file + + output: + tuple val(meta), path("*.ataqv.json"), emit: json + tuple val(meta), path("*.problems") , emit: problems, optional: true + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def peak = peak_file ? "--peak-file $peak_file" : '' + def tss = tss_file ? "--tss-file $tss_file" : '' + def excl_regs = excl_regs_file ? "--excluded-region-file $excl_regs_file" : '' + def autosom_ref = autosom_ref_file ? "--autosomal-reference-file $autosom_ref_file" : '' + """ + ataqv \\ + $options.args \\ + $peak \\ + $tss \\ + $excl_regs \\ + $autosom_ref \\ + --metrics-file "${prefix}.ataqv.json" \\ + --threads $task.cpus \\ + --name $prefix \\ + $organism \\ + $bam + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( ataqv --version ) + END_VERSIONS + """ +} diff --git a/modules/ataqv/ataqv/meta.yml b/modules/ataqv/ataqv/meta.yml new file mode 100644 index 00000000..760bf95f --- /dev/null +++ b/modules/ataqv/ataqv/meta.yml @@ -0,0 +1,66 @@ +name: ataqv_ataqv +description: ataqv function of a corresponding ataqv tool +keywords: + - ataqv +tools: + - ataqv: + description: ataqv is a toolkit for measuring and comparing ATAC-seq results. It was written to help understand how well ATAC-seq assays have worked, and to make it easier to spot differences that might be caused by library prep or sequencing. + homepage: https://github.com/ParkerLab/ataqv/blob/master/README.rst + documentation: https://github.com/ParkerLab/ataqv/blob/master/README.rst + tool_dev_url: https://github.com/ParkerLab/ataqv + doi: "https://doi.org/10.1016/j.cels.2020.02.009" + licence: ['GPL v3'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file with the same prefix as bam file. Required if tss_file input is provided. + pattern: "*.bam.bai" + - peak_file: + type: file + description: A BED file of peaks called for alignments in the BAM file + pattern: "*.bed" + - organism: + type: string + description: The subject of the experiment, which determines the list of autosomes (see "Reference Genome Configuration" section at https://github.com/ParkerLab/ataqv). + - tss_file: + type: file + description: A BED file of transcription start sites for the experiment organism. If supplied, a TSS enrichment score will be calculated according to the ENCODE data standards. This calculation requires that the BAM file of alignments be indexed. + pattern: "*.bed" + - excl_regs_file: + type: file + description: A BED file containing excluded regions. Peaks or TSS overlapping these will be ignored. + pattern: "*.bed" + - autosom_ref_file: + type: file + description: A file containing autosomal reference names, one per line. The names must match the reference names in the alignment file exactly, or the metrics based on counts of autosomal alignments will be wrong. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - json: + type: file + description: The JSON file to which metrics will be written. + - problems: + type: file + description: If given, problematic reads will be logged to a file per read group, with names derived from the read group IDs, with ".problems" appended. If no read groups are found, the reads will be written to one file named after the BAM file. + pattern: "*.problems" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@i-pletenev" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 22a3edf5..de72731b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -34,6 +34,10 @@ assemblyscan: - modules/assemblyscan/** - tests/modules/assemblyscan/** +ataqv/ataqv: + - modules/ataqv/ataqv/** + - tests/modules/ataqv/ataqv/** + bamaligncleaner: - modules/bamaligncleaner/** - tests/modules/bamaligncleaner/** diff --git a/tests/modules/ataqv/ataqv/main.nf b/tests/modules/ataqv/ataqv/main.nf new file mode 100644 index 00000000..2f2a62eb --- /dev/null +++ b/tests/modules/ataqv/ataqv/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ATAQV_ATAQV } from '../../../../modules/ataqv/ataqv/main.nf' addParams( options: [:] ) +include { ATAQV_ATAQV as ATAQV_ATAQV_PROBLEM_READS} from '../../../../modules/ataqv/ataqv/main.nf' addParams( options: ['args': '--log-problematic-reads'] ) + +workflow test_ataqv_ataqv { + + input = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + [], + [] + ] + + ATAQV_ATAQV ( input, 'human', [], [], [] ) +} + +workflow test_ataqv_ataqv_problem_reads { + + input = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + [], + [] + ] + + ATAQV_ATAQV_PROBLEM_READS ( input, 'human', [], [], [] ) +} + +workflow test_ataqv_ataqv_peak { + + input = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + + ATAQV_ATAQV ( input, 'human', [], [], [] ) +} + +workflow test_ataqv_ataqv_tss { + + input = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + tss_file = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + + ATAQV_ATAQV ( input, 'human', tss_file, [], [] ) +} + +workflow test_ataqv_ataqv_excluded_regs { + + input = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + tss_file = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + excl_regs_file = file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) + + ATAQV_ATAQV ( input, 'human', tss_file, excl_regs_file, [] ) +} diff --git a/tests/modules/ataqv/ataqv/test.yml b/tests/modules/ataqv/ataqv/test.yml new file mode 100644 index 00000000..77452f6f --- /dev/null +++ b/tests/modules/ataqv/ataqv/test.yml @@ -0,0 +1,51 @@ +- name: ataqv ataqv test_ataqv_ataqv + command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv -c tests/config/nextflow.config + tags: + - ataqv + - ataqv/ataqv + files: + - path: output/ataqv/test.ataqv.json + contains: + - '"forward_mate_reads": 101' + +- name: ataqv ataqv test_ataqv_ataqv_problem_reads + command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_problem_reads -c tests/config/nextflow.config + tags: + - ataqv + - ataqv/ataqv + files: + - path: output/ataqv/1.problems + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ataqv/test.ataqv.json + contains: + - '"forward_mate_reads": 101' + +- name: ataqv ataqv test_ataqv_ataqv_peak + command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_peak -c tests/config/nextflow.config + tags: + - ataqv + - ataqv/ataqv + files: + - path: output/ataqv/test.ataqv.json + contains: + - '"forward_mate_reads": 101' + +- name: ataqv ataqv test_ataqv_ataqv_tss + command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_tss -c tests/config/nextflow.config + tags: + - ataqv + - ataqv/ataqv + files: + - path: output/ataqv/test.ataqv.json + contains: + - '"forward_mate_reads": 101' + +- name: ataqv ataqv test_ataqv_ataqv_excluded_regs + command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_excluded_regs -c tests/config/nextflow.config + tags: + - ataqv + - ataqv/ataqv + files: + - path: output/ataqv/test.ataqv.json + contains: + - '"forward_mate_reads": 101'