From 8a2a9f7e811abd133424cb3df45418b3572c3ec3 Mon Sep 17 00:00:00 2001 From: Ravneet Bhuller Date: Mon, 22 Mar 2021 22:27:30 +0000 Subject: [PATCH] cnvkit module (#173) * Normal bam file added * Normal bam.bai file added * Tumour bam bai files added * human dir added * annotation dir added * cnvkit dir added * cnvkit dir added * Update software/cnvkit/main.nf Co-authored-by: Maxime Garcia * Update software/cnvkit/main.nf Co-authored-by: Maxime Garcia * Update software/cnvkit/main.nf Co-authored-by: Maxime Garcia * changed input filenames * edited main.nf * edited main.nf * edited meta.nf * edited test.yml * filters.yml * edited main * edited main * edited meta * edited meta * edited main * removed unwanted lines * edited the path to the main.nf * removed function.nf * added functions.nf * deleted 2 workflows and craeted a common workflow * deleted paths for 2 workflows and created paths for a common workflow * Deleted annotation dir * deleted params.modules * Edited meta.with_normal * deleted normal_280_sub_chr21.bam * deleted normal_280_sub_chr21.bam.bai * deleted tumour_278_sub_chr21.bam * deleted tumour_278_sub_chr21.bam.bai * Edited input and script parts * Edited input part * Added * Edited args * Edited script * Edited input * Changed annotation to annotationfile * Changed description of the tool * edited singularuty container * edited input * line 44 removed trailing whitespace * Edited addParams * Deleted pdf output * Deleted pdf output * edited the path to main.nf * edited path to the main.nf * Added docker image version * Removed extra ../ * added md5sums * added md5sums * Update software/cnvkit/main.nf Co-authored-by: Harshil Patel * Update software/cnvkit/main.nf Co-authored-by: Harshil Patel * Edited the script * Edited the input * Edited main.nf * Edited main.nf * edited md5sum for reference.cnn * removed human fasta * removed human fasta.fai * added GRCh38 fasta * added GRCh38 fasta.fai * added hg19 fasta.fai * added hg19 fasta * Edited fasta file name * Edited bed file names and md5sums * Edited md5sums * edited the input and script section * edited input section * added targetfile * changed the files * changed the output files * added bam files * added bam files * remove files * added md5sums * replace file * added files * edited tests/software/cnvkit files * edited tests/software/cnvkit files * edited authors list * removed files * added files * added files * added files * added files * added file * added file * added file * added file * edited files * edited files * edited files * edited files * edited files * edited files * added new module * added new module * edited files * edited file * edited file * edited file * removed files Co-authored-by: kaurravneet4123 Co-authored-by: Maxime Garcia Co-authored-by: Harshil Patel --- software/cnvkit/functions.nf | 59 +++++++++++++++ software/cnvkit/main.nf | 46 ++++++++++++ software/cnvkit/meta.yml | 87 ++++++++++++++++++++++ tests/config/nextflow.config | 2 +- tests/config/pytest_software.yml | 10 ++- tests/data/genomics/sarscov2/bed/baits.bed | 5 ++ tests/software/cnvkit/main.nf | 20 +++++ tests/software/cnvkit/test.yml | 27 +++++++ 8 files changed, 254 insertions(+), 2 deletions(-) create mode 100755 software/cnvkit/functions.nf create mode 100755 software/cnvkit/main.nf create mode 100755 software/cnvkit/meta.yml mode change 100644 => 100755 tests/config/pytest_software.yml create mode 100755 tests/data/genomics/sarscov2/bed/baits.bed create mode 100755 tests/software/cnvkit/main.nf create mode 100755 tests/software/cnvkit/test.yml diff --git a/software/cnvkit/functions.nf b/software/cnvkit/functions.nf new file mode 100755 index 00000000..d25eea86 --- /dev/null +++ b/software/cnvkit/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/software/cnvkit/main.nf b/software/cnvkit/main.nf new file mode 100755 index 00000000..ca6acbae --- /dev/null +++ b/software/cnvkit/main.nf @@ -0,0 +1,46 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process CNVKIT { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::cnvkit=0.9.8" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/cnvkit:0.9.8--py_0" + } else { + container "quay.io/biocontainers/cnvkit:0.9.8--py_0" + } + + input: + tuple val(meta), path(tumourbam), path(normalbam) + path fasta + path targetfile + + output: + tuple val(meta), path("*.bed"), emit: bed + tuple val(meta), path("*.cnn"), emit: cnn + tuple val(meta), path("*.cnr"), emit: cnr + tuple val(meta), path("*.cns"), emit: cns + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + """ + cnvkit.py batch \\ + $tumourbam \\ + --normal $normalbam\\ + --fasta $fasta \\ + --targets $targetfile \\ + $options.args + + cnvkit.py version | sed -e "s/cnvkit v//g" > ${software}.version.txt + """ +} diff --git a/software/cnvkit/meta.yml b/software/cnvkit/meta.yml new file mode 100755 index 00000000..d3d81ecc --- /dev/null +++ b/software/cnvkit/meta.yml @@ -0,0 +1,87 @@ +name: cnvkit +description: Copy number variant detection from high-throughput sequencing data +keywords: + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tumourbam: + type: file + description: | + Input tumour sample bam file + - normalbam: + type: file + description: | + Input normal sample bam file + - fasta: + type: file + description: | + Input reference genome fasta file + - targetfile: + type: file + description: | + Input target bed file +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: File containing genomic regions + pattern: "*.{bed}" + - cnn: + type: file + description: File containing coverage information + pattern: "*.{cnn}" + - cnr: + type: file + description: File containing copy number ratio information + pattern: "*.{cnr}" + - cns: + type: file + description: File containing copy number segment information + pattern: "*.{cns}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@kaurravneet4123" + - "@KevinMenden" + - "@MaxUlysse" + - "@drpatelh" + diff --git a/tests/config/nextflow.config b/tests/config/nextflow.config index 4b0aad5a..8b508b78 100644 --- a/tests/config/nextflow.config +++ b/tests/config/nextflow.config @@ -7,7 +7,7 @@ params { process { cpus = 2 - memory = 6.GB + memory = 3.GB time = 48.h } diff --git a/tests/config/pytest_software.yml b/tests/config/pytest_software.yml old mode 100644 new mode 100755 index df39d82f..2d459f88 --- a/tests/config/pytest_software.yml +++ b/tests/config/pytest_software.yml @@ -99,7 +99,7 @@ bismark_summary: blast_blastn: - software/blast/blastn/** - tests/software/blast/blastn/** - + blast_makeblastdb: - software/blast/makeblastdb/** - tests/software/blast/makeblastdb/** @@ -150,6 +150,10 @@ cat_fastq: - software/cat/fastq/** - tests/software/cat/fastq/** +cnvkit: + - software/cnvkit/** + - tests/software/cnvkit/** + cutadapt: - software/cutadapt/** - tests/software/cutadapt/** @@ -339,6 +343,10 @@ seqkit_split2: - software/seqkit/split2/** - tests/software/seqkit/split2/** +sequenza_wiggle: + - software/sequenza/wiggle/** + - tests/software/sequenza/wiggle/** + seqwish_induce: - software/seqwish/induce/** - tests/software/seqwish/induce/** diff --git a/tests/data/genomics/sarscov2/bed/baits.bed b/tests/data/genomics/sarscov2/bed/baits.bed new file mode 100755 index 00000000..1a3a5a25 --- /dev/null +++ b/tests/data/genomics/sarscov2/bed/baits.bed @@ -0,0 +1,5 @@ +MT192765.1 1098 1127 +MT192765.1 4190 4255 +MT192765.1 5697 5716 +MT192765.1 5798 5807 +MT192765.1 11217 11253 diff --git a/tests/software/cnvkit/main.nf b/tests/software/cnvkit/main.nf new file mode 100755 index 00000000..b32f1fb6 --- /dev/null +++ b/tests/software/cnvkit/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { CNVKIT } from '../../../software/cnvkit/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] ) + +workflow test_cnvkit { + + tumourbam = [ file("${launchDir}/tests/data/genomics/sarscov2/bam/test_paired_end.sorted.bam", checkIfExists: true) ] + normalbam = [ file("${launchDir}/tests/data/genomics/sarscov2/bam/test_single_end.sorted.bam", checkIfExists: true) ] + + def input = [] + input = [ [ id:'test' ], // meta map + tumourbam, normalbam ] + + fasta = [ file("${launchDir}/tests/data/genomics/sarscov2/fasta/test_genome.fasta", checkIfExists: true) ] + targetfile = [ file("${launchDir}/tests/data/genomics/sarscov2/bed/baits.bed", checkIfExists: true) ] + + CNVKIT ( input, fasta, targetfile ) +} diff --git a/tests/software/cnvkit/test.yml b/tests/software/cnvkit/test.yml new file mode 100755 index 00000000..dbfbee33 --- /dev/null +++ b/tests/software/cnvkit/test.yml @@ -0,0 +1,27 @@ +- name: cnvkit + command: nextflow run ./tests/software/cnvkit/ -entry test_cnvkit -c tests/config/nextflow.config + tags: + - cnvkit + files: + - path: output/cnvkit/baits.target.bed + md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/baits.antitarget.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/cnvkit/reference.cnn + md5sum: ac99c1ad8b917b96ae15119146c91ab9 + - path: output/cnvkit/test_paired_end.sorted.targetcoverage.cnn + md5sum: 3fe80b6013ffc3e9968345e810158215 + - path: output/cnvkit/test_paired_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test_single_end.sorted.targetcoverage.cnn + md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 + - path: output/cnvkit/test_single_end.sorted.antitargetcoverage.cnn + md5sum: 203caf8cef6935bb50b4138097955cb8 + - path: output/cnvkit/test_paired_end.sorted.cnr + md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3 + - path: output/cnvkit/test_paired_end.sorted.cns + md5sum: 060af1aa637ed51812af19bcce24fcfe + - path: output/cnvkit/test_paired_end.sorted.bintest.cns + md5sum: 6544d979475def8a9f69ba42a985668d + - path: output/cnvkit/test_paired_end.sorted.call.cns + md5sum: f2ca59b4d50b0c317adc526c1b99b622