Merge pull request #42 from JoseEspinosa/bedtools_dev

Add modules complement, genomecov, intersect, merge, sort for bedtools
This commit is contained in:
Harshil Patel 2020-08-05 15:45:17 +01:00 committed by GitHub
commit 95a91032ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
38 changed files with 495 additions and 0 deletions

View file

@ -0,0 +1,7 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/complement"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-complement/bin:$PATH

View file

@ -0,0 +1,9 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-complement
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -0,0 +1,32 @@
def MODULE = "bedtools_complement"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_COMPLEMENT {
tag {input_file}
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
path (fasta_sizes)
val (bedtools_complement_args)
output:
path "${input_file}.bed", emit: complement
path "*.version.txt", emit: version
script:
"""
bedtools complement -i ${input_file} -g ${fasta_sizes} ${bedtools_complement_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -0,0 +1,30 @@
name: bedtools complement
description: Returns all intervals in a genome that are not covered by at least one interval in the input BED/GFF/VCF file
keywords:
- complement
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf}"
- fasta_sizes:
type: file
description: Genome chromosome sizes
pattern: "*.{txt,sizes}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,gff,vcf}"
authors:
- "@JoseEspinosa"

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/A.bed

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/genome.sizes

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_complement_args = ''
include BEDTOOLS_COMPLEMENT from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
// Run the workflow
workflow {
BEDTOOLS_COMPLEMENT(ch_input, chrom_sizes, params.bedtools_complement_args)
}

View file

@ -0,0 +1,7 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/genomecov"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-genomecov/bin:$PATH

View file

@ -0,0 +1,9 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-genomecov
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -0,0 +1,32 @@
def MODULE = "bedtools_genomecov"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_GENOMECOV {
tag {bam}
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (bam)
path (chrom_sizes)
val (bedtools_genomecov_args)
output:
path "${bam}.bed", emit: coverage
path "*.version.txt", emit: version
script:
"""
bedtools genomecov -ibam ${bam} -g ${chrom_sizes} ${bedtools_genomecov_args} > ${bam}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -0,0 +1,30 @@
name: bedtools genomecov
description: Returns feature coverage for a given genome in different formats
keywords:
- genomecov
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bam}"
- chrom_sizes:
type: file
description: Genome chromosome sizes
pattern: "*.{txt,sizes}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,bedGraph}"
authors:
- "@JoseEspinosa"

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/genome.sizes

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_genomecov_args = '' //'-bg'
include BEDTOOLS_GENOMECOV from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
chrom_sizes = Channel.fromPath('./input_data/genome.sizes')
// Run the workflow
workflow {
BEDTOOLS_GENOMECOV(ch_input, chrom_sizes, params.bedtools_genomecov_args)
}

View file

@ -0,0 +1,7 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/intersect"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-intersectbed/bin:$PATH

View file

@ -0,0 +1,9 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-intersect
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -0,0 +1,37 @@
def MODULE = "bedtools_intersect"
params.publish_dir = MODULE
params.publish_results = "default"
process INTERSECT_BED {
tag "$input_file_1-$input_file_2"
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file_1)
path (input_file_2)
val (intersectbed_args)
output:
path "${input_file_1.baseName}_i_${input_file_2.baseName}.bed", emit: intersect
path "*.version.txt", emit: version
script:
def params_string = intersectbed_args.collect {
/-$it.key $it.value/
} join " "
"""
bedtools intersect -a ${input_file_1} -b ${input_file_2} ${params_string} > ${input_file_1.baseName}_i_${input_file_2.baseName}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -0,0 +1,30 @@
name: bedtools intersect
description: Returns the overlapping features between two sets of genomics features
keywords:
- bedtools intersect
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file_1:
type: file
description: Input genomic coordinates file
pattern: "*.{bam,bed,BED,gff,vcf}"
- input_file_2:
type: file
description: Input genomic coordinates file
pattern: "*.{bam,bed,BED,gff,vcf}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,BED}"
authors:
- "@JoseEspinosa"

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/A.bed

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/B.bed

View file

@ -0,0 +1,24 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.intersect_args = '' //'-bed -c -f 0.20'
include check_output from '../../../../tests/functions/check_process_outputs.nf' // params(params)
include INTERSECT_BED from '../main.nf' params(params)
// Define input channels
ch_input_1 = Channel.fromPath('./input_data/A.bed')
ch_input_2 = Channel.fromPath('./input_data/B.bed')
def additional_params_map = [:]
additional_params_map = [ s: "",
f: 0.9 ]
// Run the workflow
workflow {
INTERSECT_BED(ch_input_1, ch_input_2, additional_params_map)
}

View file

@ -0,0 +1,7 @@
FROM nfcore/base
LABEL authors="Jose Espinosa-Carrasco" \
description="Docker image containing all requirements for nf-core/modules/bedtools/complementbed"
COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/nf-core-bedtools-merge/bin:$PATH

View file

@ -0,0 +1,9 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-merge
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -0,0 +1,31 @@
def MODULE = "bedtools_merge"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_MERGE {
tag { input_file }
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
val (bedtools_merge_args)
output:
path "${input_file}.bed", emit: merge
path "*.version.txt", emit: version
script:
"""
bedtools merge -i ${input_file} ${bedtools_merge_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -0,0 +1,26 @@
name: bedtools merge
description: Combines overlapping genome features of a single file
keywords:
- merge
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf,bam}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed}"
authors:
- "@JoseEspinosa"

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/A.bed

View file

@ -0,0 +1 @@
../../../../../tests/data/bam/JK2067_downsampled_s0.1.bam

View file

@ -0,0 +1,19 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_merge_args = '' //''-s -c 6 -o distinct'
include BEDTOOLS_MERGE from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
//ch_input = Channel.fromPath('./input_data/JK2067_downsampled_s0.1.bam')
// Run the workflow
workflow {
BEDTOOLS_MERGE(ch_input, params.bedtools_merge_args)
}

View file

@ -0,0 +1,2 @@
docker.enabled = true
params.outdir = './results'

View file

@ -0,0 +1,9 @@
# You can use this file to create a conda environment for this pipeline:
# conda env create -f environment.yml
name: nf-core-bedtools-sort
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bedtools=2.29.2

View file

@ -0,0 +1,31 @@
def MODULE = "bedtools_sort"
params.publish_dir = MODULE
params.publish_results = "default"
process BEDTOOLS_SORT {
tag { input_file }
publishDir "${params.out_dir}/${params.publish_dir}",
mode: params.publish_dir_mode,
saveAs: { filename ->
if (params.publish_results == "none") null
else filename }
container "docker.pkg.github.com/nf-core/$MODULE"
conda "${moduleDir}/environment.yml"
input:
path (input_file)
val (bedtools_sort_args)
output:
path "${input_file}.bed", emit: sort
path "*.version.txt", emit: version
script:
"""
bedtools sort -i ${input_file} ${bedtools_sort_args} > ${input_file}.bed
bedtools --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > bedtools.version.txt
"""
}

View file

@ -0,0 +1,26 @@
name: bedtools sort
description: Returns a sorted feature file by chromosome and other criteria
keywords:
- sort
tools:
- bedtools:
description: |
Bedtools is a software package that provides with a toolset to perform genome arithmetic operations.
homepage: https://bedtools.readthedocs.io/en/latest/index.html
documentation: https://bedtools.readthedocs.io/en/latest/index.html
doi: 10.093/bioinformatics/btq033
input:
-
- input_file:
type: file
description: Input genomic coordinates file
pattern: "*.{bed,gff,vcf}"
output:
-
- index:
type: stdout,file
description:
pattern: "stdout,*.{bed,gff,vcf}"
authors:
- "@JoseEspinosa"

View file

@ -0,0 +1 @@
../../../../../tests/data/bed/A.bed

View file

@ -0,0 +1,18 @@
#!/usr/bin/env nextflow
nextflow.preview.dsl = 2
params.out_dir = "test_output"
params.fastqc_args = ''
params.publish_dir_mode = "copy"
params.bedtools_sort_args = '' //'-sizeD'
include BEDTOOLS_SORT from '../main.nf' params(params)
// Define input channels
ch_input = Channel.fromPath('./input_data/A.bed')
// Run the workflow
workflow {
BEDTOOLS_SORT(ch_input, params.bedtools_sort_args)
}

Binary file not shown.

5
tests/data/bed/A.bed Normal file
View file

@ -0,0 +1,5 @@
chr1 951 1061
chr1 1300 1420
chr1 1400 1500

1
tests/data/bed/B.bed Normal file
View file

@ -0,0 +1 @@
chr1 999 1010

View file

@ -0,0 +1 @@
chr1 1780869