From 95dfa7bc8d6ebef115ce6f4c1e9babc3635db623 Mon Sep 17 00:00:00 2001 From: Svyatoslav Sidorov Date: Wed, 7 Jul 2021 11:44:09 +0300 Subject: [PATCH] Enhance bedtools (#557) * hifiasm copied from fastqc * hifiasm tests init from fastqc * meta.yml init; test.yml and main.nf for printing version * Add hifiasm version printing * Removed spaced on an empty line * Reverted hifiasm from main * hifiasm copied from fastqc * hifiasm tests init from fastqc * meta.yml init; test.yml and main.nf for printing version * Add hifiasm version printing * Removed spaced on an empty line * Reverted hifiasm from main * Generalized intersect and genomecov * Finished subtract and tests for intersect and genomecov * Finished up meta YAMLs * Replaced functions.nf in subtract * do not add "." in prefix when suffix is set (#550) when using the suffix option in the fastqc module it gets added with a "." to the ${meta.id}, this leads to sample name clashing when passing the fastqc output of raw and processed files suffixed with "trim" or "clean" to multiqc. I suggest to remove the "." this would also be more consistent with the prefix def in most of the other nf-core modules. ##' Ref: https://nfcore.slack.com/archives/CJRH30T6V/p1625129138043900 * Modules TLC (#551) * Modules TLC * Fix all the tests * Fix linting failures (#552) * Modules TLC * Fix all the tests * Fix linting failures * Soft-link build files to bypass lint error * Update software/rseqc/junctionannotation/main.nf * Fix rsem (#555) * bump bcftools filter * fix bcftools * rsem/preparereference * added calculateexpression * meta.yml and pytest_software * paired end * add test.yml * fix rseqc/junctionsaturation (#553) * bump bcftools filter * junctionsaturation meta.yml and test * pytest config entry * bump bcftools version down again * fix test * fix bcftools * Adapt linting action (#556) * bump bcftools filter * fix bcftools * adapt module linting * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/intersect/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/intersect/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/intersect/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/intersect/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/intersect/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/subtract/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/subtract/main.nf Co-authored-by: Harshil Patel * Update software/bedtools/subtract/meta.yml Co-authored-by: Harshil Patel * Update software/bedtools/subtract/meta.yml Co-authored-by: Harshil Patel * Update tests/software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update tests/software/bedtools/genomecov/main.nf Co-authored-by: Harshil Patel * Update tests/software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Update tests/software/bedtools/intersect/main.nf Co-authored-by: Harshil Patel * Fixed variable names in intersect and genomecov Co-authored-by: Sviatoslav Sidorov Co-authored-by: riederd Co-authored-by: Harshil Patel Co-authored-by: Kevin Menden --- software/bedtools/genomecov/main.nf | 37 ++++++++---- software/bedtools/genomecov/meta.yml | 19 ++++-- software/bedtools/intersect/main.nf | 13 +++-- software/bedtools/intersect/meta.yml | 24 ++++---- software/bedtools/subtract/functions.nf | 68 ++++++++++++++++++++++ software/bedtools/subtract/main.nf | 41 +++++++++++++ software/bedtools/subtract/meta.yml | 45 ++++++++++++++ tests/config/pytest_software.yml | 4 ++ tests/software/bedtools/genomecov/main.nf | 15 ++++- tests/software/bedtools/genomecov/test.yml | 15 ++++- tests/software/bedtools/intersect/main.nf | 15 ++++- tests/software/bedtools/intersect/test.yml | 17 ++++-- tests/software/bedtools/subtract/main.nf | 15 +++++ tests/software/bedtools/subtract/test.yml | 8 +++ 14 files changed, 294 insertions(+), 42 deletions(-) create mode 100644 software/bedtools/subtract/functions.nf create mode 100644 software/bedtools/subtract/main.nf create mode 100644 software/bedtools/subtract/meta.yml create mode 100644 tests/software/bedtools/subtract/main.nf create mode 100644 tests/software/bedtools/subtract/test.yml diff --git a/software/bedtools/genomecov/main.nf b/software/bedtools/genomecov/main.nf index 647ee2c3..f9b87464 100644 --- a/software/bedtools/genomecov/main.nf +++ b/software/bedtools/genomecov/main.nf @@ -19,22 +19,37 @@ process BEDTOOLS_GENOMECOV { } input: - tuple val(meta), path(bam) + tuple val(meta), path(intervals) + path sizes + val extension output: - tuple val(meta), path("*.bed"), emit: bed - path "*.version.txt" , emit: version + tuple val(meta), path("*.${extension}"), emit: genomecov + path "*.version.txt" , emit: version script: def software = getSoftwareName(task.process) def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - bedtools \\ - genomecov \\ - -ibam $bam \\ - $options.args \\ - > ${prefix}.bed + if (intervals.name =~ /\.bam/) { + """ + bedtools \\ + genomecov \\ + -ibam $intervals \\ + $options.args \\ + > ${prefix}.${extension} - bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt - """ + bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt + """ + } else { + """ + bedtools \\ + genomecov \\ + -i $intervals \\ + -g $sizes \\ + $options.args \\ + > ${prefix}.${extension} + + bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt + """ + } } diff --git a/software/bedtools/genomecov/meta.yml b/software/bedtools/genomecov/meta.yml index a3254307..f629665c 100644 --- a/software/bedtools/genomecov/meta.yml +++ b/software/bedtools/genomecov/meta.yml @@ -15,20 +15,26 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - intervals: type: file - description: Input BAM file - pattern: "*.{bam}" + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bed: + - genomecov: type: file - description: Computed genomecov bed file - pattern: "*.{bed}" + description: Computed genome coverage file + pattern: "*.${extension}" - version: type: file description: File containing software version @@ -37,3 +43,4 @@ authors: - "@Emiller88" - "@sruthipsuresh" - "@drpatelh" + - "@sidorov-si" diff --git a/software/bedtools/intersect/main.nf b/software/bedtools/intersect/main.nf index 3e0a2e02..4519783a 100644 --- a/software/bedtools/intersect/main.nf +++ b/software/bedtools/intersect/main.nf @@ -19,11 +19,12 @@ process BEDTOOLS_INTERSECT { } input: - tuple val(meta), path(bed1), path(bed2) + tuple val(meta), path(intervals1), path(intervals2) + val extension output: - tuple val(meta), path('*.bed'), emit: bed - path '*.version.txt' , emit: version + tuple val(meta), path("*.${extension}"), emit: intersect + path '*.version.txt' , emit: version script: def software = getSoftwareName(task.process) @@ -31,10 +32,10 @@ process BEDTOOLS_INTERSECT { """ bedtools \\ intersect \\ - -a $bed1 \\ - -b $bed2 \\ + -a $intervals1 \\ + -b $intervals2 \\ $options.args \\ - > ${prefix}.bed + > ${prefix}.${extension} bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt """ diff --git a/software/bedtools/intersect/meta.yml b/software/bedtools/intersect/meta.yml index eaba54f6..e944e355 100644 --- a/software/bedtools/intersect/meta.yml +++ b/software/bedtools/intersect/meta.yml @@ -1,5 +1,5 @@ name: bedtools_intersect -description: allows one to screen for overlaps between two sets of genomic features. +description: Allows one to screen for overlaps between two sets of genomic features. keywords: - bed - intersect @@ -14,24 +14,27 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bed1: + - intervals1: type: file - description: BED file, each feature in 1 is compared to 2 in search of overlaps - pattern: "*.{bed}" - - bed2: + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - intervals2: type: file - description: Second bed file, used to compare to first BED file - pattern: "*.{bed}" + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - extension: + type: value + description: Extension of the output file. It is set by the user and corresponds to the file format which depends on arguments (e. g., ".bed", ".bam", ".txt", etc.). output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bed: + - intersect: type: file - description: BED file with intersected intervals - pattern: "*.{bed}" + description: File containing the description of overlaps found between the two features + pattern: "*.${extension}" - version: type: file description: File containing software version @@ -40,3 +43,4 @@ authors: - "@Emiller88" - "@sruthipsuresh" - "@drpatelh" + - "@sidorov-si" diff --git a/software/bedtools/subtract/functions.nf b/software/bedtools/subtract/functions.nf new file mode 100644 index 00000000..da9da093 --- /dev/null +++ b/software/bedtools/subtract/functions.nf @@ -0,0 +1,68 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/bedtools/subtract/main.nf b/software/bedtools/subtract/main.nf new file mode 100644 index 00000000..5780cd65 --- /dev/null +++ b/software/bedtools/subtract/main.nf @@ -0,0 +1,41 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process BEDTOOLS_SUBTRACT { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0" + } else { + container "quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0" + } + + input: + tuple val(meta), path(intervals1), path(intervals2) + + output: + tuple val(meta), path("*.bed"), emit: bed + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + """ + bedtools \\ + subtract \\ + -a $intervals1 \\ + -b $intervals2 \\ + $options.args \\ + > ${prefix}.bed + + bedtools --version | sed -e "s/bedtools v//g" > ${software}.version.txt + """ +} diff --git a/software/bedtools/subtract/meta.yml b/software/bedtools/subtract/meta.yml new file mode 100644 index 00000000..a1a1e087 --- /dev/null +++ b/software/bedtools/subtract/meta.yml @@ -0,0 +1,45 @@ +name: bedtools_subtract +description: Finds overlaps between two sets of regions (A and B), removes the overlaps from A and reports the remaining portion of A. +keywords: + - bed + - gff + - vcf + - subtract +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/subtract.html + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + - intervals2: + type: file + description: BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing the difference between the two sets of features + patters: "*.bed" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + +authors: + - "@sidorov-si" diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml index 83f69481..543d8e0e 100644 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -86,6 +86,10 @@ bedtools/sort: - software/bedtools/sort/** - tests/software/bedtools/sort/** +bedtools/subtract: + - software/bedtools/subtract/** + - tests/software/bedtools/subtract/** + bismark/align: - software/bismark/align/** - software/bismark/genomepreparation/** diff --git a/tests/software/bedtools/genomecov/main.nf b/tests/software/bedtools/genomecov/main.nf index 01f02119..69cb40ee 100644 --- a/tests/software/bedtools/genomecov/main.nf +++ b/tests/software/bedtools/genomecov/main.nf @@ -9,6 +9,19 @@ workflow test_bedtools_genomecov { file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ] - BEDTOOLS_GENOMECOV ( input ) + sizes = file('dummy_chromosome_sizes') + extension = 'txt' + + BEDTOOLS_GENOMECOV ( input, sizes, extension ) } +workflow test_bedtools_genomecov_nonbam { + input = [ [ id:'test'], + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) + ] + + sizes = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + extension = 'txt' + + BEDTOOLS_GENOMECOV ( input, sizes, extension ) +} diff --git a/tests/software/bedtools/genomecov/test.yml b/tests/software/bedtools/genomecov/test.yml index a5faf5eb..327cd457 100644 --- a/tests/software/bedtools/genomecov/test.yml +++ b/tests/software/bedtools/genomecov/test.yml @@ -1,8 +1,17 @@ -- name: bedtools genomecov - command: nextflow run ./tests/software/bedtools/genomecov -entry test_bedtools_genomecov -c tests/config/nextflow.config +- name: bedtools genomecov test_bedtools_genomecov + command: nextflow run tests/software/bedtools/genomecov -entry test_bedtools_genomecov -c tests/config/nextflow.config tags: - bedtools - bedtools/genomecov files: - - path: ./output/bedtools/test_out.bed + - path: output/bedtools/test_out.txt md5sum: 66083198daca6c001d328ba9616e9b53 + +- name: bedtools genomecov test_bedtools_genomecov_nonbam + command: nextflow run tests/software/bedtools/genomecov -entry test_bedtools_genomecov_nonbam -c tests/config/nextflow.config + tags: + - bedtools + - bedtools/genomecov + files: + - path: output/bedtools/test_out.txt + md5sum: f47b58840087426e5b643d8dfd155c1f diff --git a/tests/software/bedtools/intersect/main.nf b/tests/software/bedtools/intersect/main.nf index dcfefdcb..2ce11c06 100644 --- a/tests/software/bedtools/intersect/main.nf +++ b/tests/software/bedtools/intersect/main.nf @@ -9,6 +9,19 @@ workflow test_bedtools_intersect { file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) ] + + extension = 'bed' - BEDTOOLS_INTERSECT ( input ) + BEDTOOLS_INTERSECT ( input, extension ) +} + +workflow test_bedtools_intersect_bam { + input = [ [ id:'test' ], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) + ] + + extension = 'bam' + + BEDTOOLS_INTERSECT ( input, extension ) } diff --git a/tests/software/bedtools/intersect/test.yml b/tests/software/bedtools/intersect/test.yml index ddc91321..9e11d314 100644 --- a/tests/software/bedtools/intersect/test.yml +++ b/tests/software/bedtools/intersect/test.yml @@ -1,8 +1,17 @@ -- name: bedtools intersect - command: nextflow run ./tests/software/bedtools/intersect -entry test_bedtools_intersect -c tests/config/nextflow.config +- name: bedtools intersect test_bedtools_intersect + command: nextflow run tests/software/bedtools/intersect -entry test_bedtools_intersect -c tests/config/nextflow.config tags: - bedtools - bedtools/intersect files: - - path: ./output/bedtools/test_out.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/bedtools/test_out.bed + md5sum: afcbf01c2f2013aad71dbe8e34f2c15c + +- name: bedtools intersect test_bedtools_intersect_bam + command: nextflow run tests/software/bedtools/intersect -entry test_bedtools_intersect_bam -c tests/config/nextflow.config + tags: + - bedtools + - bedtools/intersect + files: + - path: output/bedtools/test_out.bam + md5sum: 738324efe2b1e442ceb6539a630c3fe6 diff --git a/tests/software/bedtools/subtract/main.nf b/tests/software/bedtools/subtract/main.nf new file mode 100644 index 00000000..7cfefb2c --- /dev/null +++ b/tests/software/bedtools/subtract/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BEDTOOLS_SUBTRACT } from '../../../../software/bedtools/subtract/main.nf' addParams( options: [:] ) + +workflow test_bedtools_subtract { + + input = [ [ id:'test_subtract' ], + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + + BEDTOOLS_SUBTRACT ( input ) +} diff --git a/tests/software/bedtools/subtract/test.yml b/tests/software/bedtools/subtract/test.yml new file mode 100644 index 00000000..771de261 --- /dev/null +++ b/tests/software/bedtools/subtract/test.yml @@ -0,0 +1,8 @@ +- name: bedtools subtract test_bedtools_subtract + command: nextflow run tests/software/bedtools/subtract -entry test_bedtools_subtract -c tests/config/nextflow.config + tags: + - bedtools + - bedtools/subtract + files: + - path: output/bedtools/test_subtract.bed + md5sum: 63513c4dc69e8b481ce3b4b2a9f24259