Add new module 'ataqv/ataqv' (#998)

* Add new module 'ataqv/ataqv'

* Update

* Update

Co-authored-by: Harshil Patel <>
This commit is contained in:
Ilya Pletenev 2021-11-15 15:51:40 +03:00 committed by GitHub
parent 661bdb645e
commit c48244b677
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 324 additions and 0 deletions

View file

@ -0,0 +1,78 @@
// Utility functions used in nf-core DSL2 module files
// Extract name of software tool from process name using $task.process
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
// Extract name of module from process name using $task.process
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
// Tidy up and join elements of a list to return a path string
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
// Function to save/publish module results
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
path = path instanceof String ? path : ''
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
return "${getPathFromList(ext_list)}/$args.filename"
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"

View file

@ -0,0 +1,56 @@
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
params.options = [:]
options = initOptions(params.options)
process ATAQV_ATAQV {
tag "$"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
conda (params.enable_conda ? "bioconda::ataqv=1.2.1" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container ""
} else {
container ""
tuple val(meta), path(bam), path(bai), path(peak_file)
val organism
path tss_file
path excl_regs_file
path autosom_ref_file
tuple val(meta), path("*.ataqv.json"), emit: json
tuple val(meta), path("*.problems") , emit: problems, optional: true
path "versions.yml" , emit: versions
def prefix = options.suffix ? "${}${options.suffix}" : "${}"
def peak = peak_file ? "--peak-file $peak_file" : ''
def tss = tss_file ? "--tss-file $tss_file" : ''
def excl_regs = excl_regs_file ? "--excluded-region-file $excl_regs_file" : ''
def autosom_ref = autosom_ref_file ? "--autosomal-reference-file $autosom_ref_file" : ''
ataqv \\
$options.args \\
$peak \\
$tss \\
$excl_regs \\
$autosom_ref \\
--metrics-file "${prefix}.ataqv.json" \\
--threads $task.cpus \\
--name $prefix \\
$organism \\
cat <<-END_VERSIONS > versions.yml
${getSoftwareName(task.process)}: \$( ataqv --version )

View file

@ -0,0 +1,66 @@
name: ataqv_ataqv
description: ataqv function of a corresponding ataqv tool
- ataqv
- ataqv:
description: ataqv is a toolkit for measuring and comparing ATAC-seq results. It was written to help understand how well ATAC-seq assays have worked, and to make it easier to spot differences that might be caused by library prep or sequencing.
doi: ""
licence: ['GPL v3']
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.bam"
- bai:
type: file
description: BAM index file with the same prefix as bam file. Required if tss_file input is provided.
pattern: "*.bam.bai"
- peak_file:
type: file
description: A BED file of peaks called for alignments in the BAM file
pattern: "*.bed"
- organism:
type: string
description: The subject of the experiment, which determines the list of autosomes (see "Reference Genome Configuration" section at
- tss_file:
type: file
description: A BED file of transcription start sites for the experiment organism. If supplied, a TSS enrichment score will be calculated according to the ENCODE data standards. This calculation requires that the BAM file of alignments be indexed.
pattern: "*.bed"
- excl_regs_file:
type: file
description: A BED file containing excluded regions. Peaks or TSS overlapping these will be ignored.
pattern: "*.bed"
- autosom_ref_file:
type: file
description: A file containing autosomal reference names, one per line. The names must match the reference names in the alignment file exactly, or the metrics based on counts of autosomal alignments will be wrong.
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- json:
type: file
description: The JSON file to which metrics will be written.
- problems:
type: file
description: If given, problematic reads will be logged to a file per read group, with names derived from the read group IDs, with ".problems" appended. If no read groups are found, the reads will be written to one file named after the BAM file.
pattern: "*.problems"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- "@i-pletenev"

View file

@ -34,6 +34,10 @@ assemblyscan:
- modules/assemblyscan/** - modules/assemblyscan/**
- tests/modules/assemblyscan/** - tests/modules/assemblyscan/**
- modules/ataqv/ataqv/**
- tests/modules/ataqv/ataqv/**
bamaligncleaner: bamaligncleaner:
- modules/bamaligncleaner/** - modules/bamaligncleaner/**
- tests/modules/bamaligncleaner/** - tests/modules/bamaligncleaner/**

View file

@ -0,0 +1,69 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { ATAQV_ATAQV } from '../../../../modules/ataqv/ataqv/' addParams( options: [:] )
include { ATAQV_ATAQV as ATAQV_ATAQV_PROBLEM_READS} from '../../../../modules/ataqv/ataqv/' addParams( options: ['args': '--log-problematic-reads'] )
workflow test_ataqv_ataqv {
input = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
ATAQV_ATAQV ( input, 'human', [], [], [] )
workflow test_ataqv_ataqv_problem_reads {
input = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
ATAQV_ATAQV_PROBLEM_READS ( input, 'human', [], [], [] )
workflow test_ataqv_ataqv_peak {
input = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
ATAQV_ATAQV ( input, 'human', [], [], [] )
workflow test_ataqv_ataqv_tss {
input = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
tss_file = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
ATAQV_ATAQV ( input, 'human', tss_file, [], [] )
workflow test_ataqv_ataqv_excluded_regs {
input = [
[ id:'test', single_end:false ],
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
tss_file = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
excl_regs_file = file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true)
ATAQV_ATAQV ( input, 'human', tss_file, excl_regs_file, [] )

View file

@ -0,0 +1,51 @@
- name: ataqv ataqv test_ataqv_ataqv
command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv -c tests/config/nextflow.config
- ataqv
- ataqv/ataqv
- path: output/ataqv/test.ataqv.json
- '"forward_mate_reads": 101'
- name: ataqv ataqv test_ataqv_ataqv_problem_reads
command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_problem_reads -c tests/config/nextflow.config
- ataqv
- ataqv/ataqv
- path: output/ataqv/1.problems
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ataqv/test.ataqv.json
- '"forward_mate_reads": 101'
- name: ataqv ataqv test_ataqv_ataqv_peak
command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_peak -c tests/config/nextflow.config
- ataqv
- ataqv/ataqv
- path: output/ataqv/test.ataqv.json
- '"forward_mate_reads": 101'
- name: ataqv ataqv test_ataqv_ataqv_tss
command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_tss -c tests/config/nextflow.config
- ataqv
- ataqv/ataqv
- path: output/ataqv/test.ataqv.json
- '"forward_mate_reads": 101'
- name: ataqv ataqv test_ataqv_ataqv_excluded_regs
command: nextflow run tests/modules/ataqv/ataqv -entry test_ataqv_ataqv_excluded_regs -c tests/config/nextflow.config
- ataqv
- ataqv/ataqv
- path: output/ataqv/test.ataqv.json
- '"forward_mate_reads": 101'