Added all modules using bedtools in the enhancer transcript identification section

This commit is contained in:
Sruthi Suresh 2020-11-11 15:31:22 -06:00 committed by sruthipsuresh
parent 637b545168
commit 5205ce286a
18 changed files with 10484 additions and 0 deletions

View file

@ -0,0 +1,59 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

View file

@ -0,0 +1,36 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
process BEDTOOLS_ERNAS {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? "bioconda::bedtools =2.29.2" : null)
if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/bedtools:2.29.2--hc088bd4_0 "
} else {
container "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
}
input:
tuple val(meta), path(nogenesbed)
path "H3K27ac"
path "H3K4me1"
output:
tuple val(meta), path("*.erna.bed"), emit: ernabed
path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
bedtools intersect -a $nogenesbed -b $nogenesbed $H3K27ac $H3K4me1 -sorted -u -bed > ${prefix}.erna.bed
bedtools --version | sed -e "s/Bedtools v//g" > ${software}.version.txt
"""
}

View file

@ -0,0 +1,55 @@
name: bedtools_ernas
description: Takes in units with genes removed
Keeps anything that intersects with H3K27ac or H3K4me1
keywords:
- bed
- bedtoolsintersect
tools:
- bedtools:
description: |
A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
documentation: https://bedtools.readthedocs.io/en/latest/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: List of bed files
pattern: "*.{bed}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: Edited bed file
pattern: "*.{bed}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"

View file

@ -0,0 +1,59 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

View file

@ -0,0 +1,35 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
process BEDTOOLS_HISTONESTOBED {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? "bioconda::bedtools =2.29.2" : null)
if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/bedtools:2.29.2--hc088bd4_0 "
} else {
container "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
}
// In the case of use for cell lines, the second input can also be replaced with the tuple (val) format.
input:
tuple val(meta), path(bams)
output:
tuple val(meta), path("*.bed"), emit: bed
path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
bamToBed -i ${bams[0]} | sortBed -i > ${prefix}.bed
bedtools --version | sed -e "s/Bedtools v//g" > ${software}.version.txt
"""
}

View file

@ -0,0 +1,53 @@
name: bedtools_histonestoned
description: Coverts Histone Bam files to BED format
keywords:
- bed
- bedtoolsintersect
tools:
- bedtools:
description: |
A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
documentation: https://bedtools.readthedocs.io/en/latest/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: List of bed files
pattern: "*.{bed}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: Edited bed file
pattern: "*.{bed}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"

View file

@ -0,0 +1 @@
chr1 999 1010

View file

@ -0,0 +1,5 @@
chr1 951 1061
chr1 1300 1420
chr1 1400 1500

View file

@ -0,0 +1,5 @@
chr1 951 1061
chr1 1300 1420
chr1 1400 1500

View file

@ -4,6 +4,10 @@ nextflow.preview.dsl = 2
include { BEDTOOLS_SLOP } from '../slop/main.nf' addParams( options: [publish_dir:'test_bed_file'])
include { BEDTOOLS_REMOVEGENES } from '../removegenes/main.nf' addParams( options: [publish_dir:'test_bed_file'])
include { BEDTOOLS_ERNAS } from '../ernas/main.nf' addParams( options: [publish_dir:'test_bed_file'])
include { BEDTOOLS_TESTERNA } from '../testerna/main.nf' addParams( options: [publish_dir:'test_bed_file'])
include { BEDTOOLS_HISTONESTOBED} from '../histonestobed/main.nf' addParams( options: [publish_dir:'test_histones_to_bed'])
include { BEDTOOLS_ERNAGENEGROUPS} from '../ernagenegroups/main.nf' addParams( options: [publish_dir:'test_erna_gene_groups'])
// Define input channels
@ -21,9 +25,31 @@ workflow test_bed_file {
BEDTOOLS_SLOP.out.slopbed,
file("${baseDir}/input/B.metatranscripts", checkIfExists: true)
)
BEDTOOLS_ERNAS(
BEDTOOLS_REMOVEGENES.out.nogenesbed,
file("${baseDir}/input/H3K27ac.bed", checkIfExists: true),
file("${baseDir}/input/H3K4me1.bed", checkIfExists: true)
)
BEDTOOLS_TESTERNA(
BEDTOOLS_ERNAS.out.ernabed,
file("${baseDir}/input/B.bed", checkIfExists: true)
)
}
workflow test_histones_to_bed {
def input = []
input = [ [ id:'test', single_end:true ],
[ file("${baseDir}/input/test.single_end.sorted.bam", checkIfExists: true),] ]
BEDTOOLS_HISTONESTOBED( input )
}
workflow {
test_bed_file()
test_histones_to_bed()
}

View file

@ -0,0 +1 @@
chr1 999 1010

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

View file

@ -0,0 +1,37 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
process BEDTOOLS_TESTERNA {
tag "$meta.id"
label 'process_medium'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? "bioconda::bedtools =2.29.2" : null)
if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/bedtools:2.29.2--hc088bd4_0 "
} else {
container "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0"
}
// In the case of use for cell lines, the second input can also be replaced with the tuple (val) format.
input:
tuple val(meta), path(ernabed)
path ernareferencebed
output:
tuple val(meta), path("*.erna.overlaps.bed"), emit: ernaoverlapsbed
path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
bedtools intersect -a $ernareferencebed -b $ernabed \\
-sorted -u > ${prefix}.erna.overlaps.bed
bedtools --version | sed -e "s/Bedtools v//g" > ${software}.version.txt
"""
}

View file

@ -0,0 +1,53 @@
name: bedtools_testerna
description: Compares identified ernas across cell lines/across legacy ernas
keywords:
- bed
- bedtoolsintersect
tools:
- bedtools:
description: |
A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
documentation: https://bedtools.readthedocs.io/en/latest/
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: List of bed files
pattern: "*.{bed}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: Edited bed file
pattern: "*.{bed}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"