Merge pull request #1673 from nvnieuwk/new-module-bedtools/split

new module: bedtools split
This commit is contained in:
Jose Espinosa-Carrasco 2022-05-17 14:45:07 +02:00 committed by GitHub
commit fbf20c1e36
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 0 deletions

View file

@ -0,0 +1,38 @@
process BEDTOOLS_SPLIT {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h468198e_3':
'quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2' }"
input:
tuple val(meta), path(bed)
val(number_of_files)
output:
tuple val(meta), path("*.bed"), emit: beds
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bedtools \\
split \\
$args \\
-i $bed \\
-p $prefix \\
-n $number_of_files
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
END_VERSIONS
"""
}

View file

@ -0,0 +1,41 @@
name: "bedtools_split"
description: Split BED files into several smaller BED files
keywords:
- sort
tools:
- "bedtools":
description: "A powerful toolset for genome arithmetic"
documentation: "https://bedtools.readthedocs.io/en/latest/content/tools/sort.html"
licence: "['MIT', 'GPL v2']"
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: BED file
pattern: "*.bed"
- bed:
type: value
description: The number of files to split the BED into
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- beds:
type: file
description: list of split BED files
pattern: "*.bed"
authors:
- "@nvnieuwk"

View file

@ -222,6 +222,10 @@ bedtools/sort:
- modules/bedtools/sort/**
- tests/modules/bedtools/sort/**
bedtools/split:
- modules/bedtools/split/**
- tests/modules/bedtools/split/**
bedtools/subtract:
- modules/bedtools/subtract/**
- tests/modules/bedtools/subtract/**

View file

@ -0,0 +1,17 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { BEDTOOLS_SPLIT } from '../../../../modules/bedtools/split/main.nf'
workflow test_bedtools_split {
input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_multi_interval_bed'], checkIfExists: true)
]
number_of_files = 2
BEDTOOLS_SPLIT ( input, number_of_files )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,10 @@
- name: bedtools split test_bedtools_split
command: nextflow run ./tests/modules/bedtools/split -entry test_bedtools_split -c ./tests/config/nextflow.config -c ./tests/modules/bedtools/split/nextflow.config
tags:
- bedtools
- bedtools/split
files:
- path: output/bedtools/test.00001.bed
md5sum: d58e5e46c2fcc3b8be5db0f023e93cb5
- path: output/bedtools/test.00002.bed
md5sum: 03caf952e9297a54620d2bbba8dc2823