cnvkit module (#173)

* Normal bam file added

* Normal bam.bai file added

* Tumour bam bai files added

* human dir added

* annotation dir added

* cnvkit dir added

* cnvkit dir added

* Update software/cnvkit/main.nf

Co-authored-by: Maxime Garcia <maxime.garcia@scilifelab.se>

* Update software/cnvkit/main.nf

Co-authored-by: Maxime Garcia <maxime.garcia@scilifelab.se>

* Update software/cnvkit/main.nf

Co-authored-by: Maxime Garcia <maxime.garcia@scilifelab.se>

* changed input filenames

* edited main.nf

* edited main.nf

* edited meta.nf

* edited test.yml

* filters.yml

* edited main

* edited main

* edited meta

* edited meta

* edited main

* removed unwanted lines

* edited the path to the main.nf

* removed function.nf

* added functions.nf

* deleted 2 workflows and craeted a common workflow

* deleted paths for 2 workflows and created paths for a common workflow

* Deleted annotation dir

* deleted params.modules

* Edited meta.with_normal

* deleted normal_280_sub_chr21.bam

* deleted normal_280_sub_chr21.bam.bai

* deleted tumour_278_sub_chr21.bam

* deleted tumour_278_sub_chr21.bam.bai

* Edited input and script parts

* Edited input part

* Added

* Edited args

* Edited script

* Edited input

* Changed annotation to annotationfile

* Changed description of the tool

* edited singularuty container

* edited input

* line 44 removed trailing whitespace

* Edited addParams

* Deleted pdf output

* Deleted pdf output

* edited the path to main.nf

* edited path to the main.nf

* Added docker image version

* Removed extra ../

* added md5sums

* added md5sums

* Update software/cnvkit/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Update software/cnvkit/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Edited the script

* Edited the input

* Edited main.nf

* Edited main.nf

* edited md5sum for reference.cnn

* removed human fasta

* removed human fasta.fai

* added GRCh38 fasta

* added GRCh38 fasta.fai

* added hg19 fasta.fai

* added hg19 fasta

* Edited fasta file name

* Edited bed file names and md5sums

* Edited md5sums

* edited the input and script section

* edited input section

* added targetfile

* changed the files

* changed the output files

* added bam files

* added bam files

* remove files

* added md5sums

* replace file

* added files

* edited tests/software/cnvkit files

* edited tests/software/cnvkit files

* edited authors list

* removed files

* added files

* added files

* added files

* added files

* added file

* added file

* added file

* added file

* edited files

* edited files

* edited files

* edited files

* edited files

* edited files

* added new module

* added new module

* edited files

* edited file

* edited file

* edited file

* removed files

Co-authored-by: kaurravneet4123 <kaurravneet4123@yahoo.com@users.noreply.github.com>
Co-authored-by: Maxime Garcia <maxime.garcia@scilifelab.se>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
This commit is contained in:
Ravneet Bhuller 2021-03-22 22:27:30 +00:00 committed by GitHub
parent 53109d53c0
commit 8a2a9f7e81
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 254 additions and 2 deletions

59
software/cnvkit/functions.nf Executable file
View file

@ -0,0 +1,59 @@
/*
* -----------------------------------------------------
* Utility functions used in nf-core DSL2 module files
* -----------------------------------------------------
*/
/*
* Extract name of software tool from process name using $task.process
*/
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}
/*
* Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
*/
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.publish_by_id = args.publish_by_id ?: false
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}
/*
* Tidy up and join elements of a list to return a path string
*/
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}
/*
* Function to save/publish module results
*/
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_id) {
path_list.add(args.publish_id)
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}

46
software/cnvkit/main.nf Executable file
View file

@ -0,0 +1,46 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
process CNVKIT {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
conda (params.enable_conda ? "bioconda::cnvkit=0.9.8" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/cnvkit:0.9.8--py_0"
} else {
container "quay.io/biocontainers/cnvkit:0.9.8--py_0"
}
input:
tuple val(meta), path(tumourbam), path(normalbam)
path fasta
path targetfile
output:
tuple val(meta), path("*.bed"), emit: bed
tuple val(meta), path("*.cnn"), emit: cnn
tuple val(meta), path("*.cnr"), emit: cnr
tuple val(meta), path("*.cns"), emit: cns
path "*.version.txt" , emit: version
script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
"""
cnvkit.py batch \\
$tumourbam \\
--normal $normalbam\\
--fasta $fasta \\
--targets $targetfile \\
$options.args
cnvkit.py version | sed -e "s/cnvkit v//g" > ${software}.version.txt
"""
}

87
software/cnvkit/meta.yml Executable file
View file

@ -0,0 +1,87 @@
name: cnvkit
description: Copy number variant detection from high-throughput sequencing data
keywords:
- bam
- fasta
- copy number
tools:
- cnvkit:
description: |
CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent.
homepage: https://cnvkit.readthedocs.io/en/stable/index.html
documentation: https://cnvkit.readthedocs.io/en/stable/index.html
params:
- outdir:
type: string
description: |
The pipeline's output directory. By default, the module will
output files into `$params.outdir/<SOFTWARE>`
- publish_dir_mode:
type: string
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
- singularity_pull_docker_container:
type: boolean
description: |
Instead of directly downloading Singularity images for use with Singularity,
force the workflow to pull and convert Docker containers instead.
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- tumourbam:
type: file
description: |
Input tumour sample bam file
- normalbam:
type: file
description: |
Input normal sample bam file
- fasta:
type: file
description: |
Input reference genome fasta file
- targetfile:
type: file
description: |
Input target bed file
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bed:
type: file
description: File containing genomic regions
pattern: "*.{bed}"
- cnn:
type: file
description: File containing coverage information
pattern: "*.{cnn}"
- cnr:
type: file
description: File containing copy number ratio information
pattern: "*.{cnr}"
- cns:
type: file
description: File containing copy number segment information
pattern: "*.{cns}"
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
authors:
- "@kaurravneet4123"
- "@KevinMenden"
- "@MaxUlysse"
- "@drpatelh"

View file

@ -7,7 +7,7 @@ params {
process { process {
cpus = 2 cpus = 2
memory = 6.GB memory = 3.GB
time = 48.h time = 48.h
} }

8
tests/config/pytest_software.yml Normal file → Executable file
View file

@ -150,6 +150,10 @@ cat_fastq:
- software/cat/fastq/** - software/cat/fastq/**
- tests/software/cat/fastq/** - tests/software/cat/fastq/**
cnvkit:
- software/cnvkit/**
- tests/software/cnvkit/**
cutadapt: cutadapt:
- software/cutadapt/** - software/cutadapt/**
- tests/software/cutadapt/** - tests/software/cutadapt/**
@ -339,6 +343,10 @@ seqkit_split2:
- software/seqkit/split2/** - software/seqkit/split2/**
- tests/software/seqkit/split2/** - tests/software/seqkit/split2/**
sequenza_wiggle:
- software/sequenza/wiggle/**
- tests/software/sequenza/wiggle/**
seqwish_induce: seqwish_induce:
- software/seqwish/induce/** - software/seqwish/induce/**
- tests/software/seqwish/induce/** - tests/software/seqwish/induce/**

View file

@ -0,0 +1,5 @@
MT192765.1 1098 1127
MT192765.1 4190 4255
MT192765.1 5697 5716
MT192765.1 5798 5807
MT192765.1 11217 11253

20
tests/software/cnvkit/main.nf Executable file
View file

@ -0,0 +1,20 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CNVKIT } from '../../../software/cnvkit/main.nf' addParams( options: [ 'args': '--output-reference reference.cnn' ] )
workflow test_cnvkit {
tumourbam = [ file("${launchDir}/tests/data/genomics/sarscov2/bam/test_paired_end.sorted.bam", checkIfExists: true) ]
normalbam = [ file("${launchDir}/tests/data/genomics/sarscov2/bam/test_single_end.sorted.bam", checkIfExists: true) ]
def input = []
input = [ [ id:'test' ], // meta map
tumourbam, normalbam ]
fasta = [ file("${launchDir}/tests/data/genomics/sarscov2/fasta/test_genome.fasta", checkIfExists: true) ]
targetfile = [ file("${launchDir}/tests/data/genomics/sarscov2/bed/baits.bed", checkIfExists: true) ]
CNVKIT ( input, fasta, targetfile )
}

27
tests/software/cnvkit/test.yml Executable file
View file

@ -0,0 +1,27 @@
- name: cnvkit
command: nextflow run ./tests/software/cnvkit/ -entry test_cnvkit -c tests/config/nextflow.config
tags:
- cnvkit
files:
- path: output/cnvkit/baits.target.bed
md5sum: 26d25ff2d6c45b6d92169b3559c6acdb
- path: output/cnvkit/baits.antitarget.bed
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/cnvkit/reference.cnn
md5sum: ac99c1ad8b917b96ae15119146c91ab9
- path: output/cnvkit/test_paired_end.sorted.targetcoverage.cnn
md5sum: 3fe80b6013ffc3e9968345e810158215
- path: output/cnvkit/test_paired_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test_single_end.sorted.targetcoverage.cnn
md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7
- path: output/cnvkit/test_single_end.sorted.antitargetcoverage.cnn
md5sum: 203caf8cef6935bb50b4138097955cb8
- path: output/cnvkit/test_paired_end.sorted.cnr
md5sum: 7e37d73ab604dbc3fe4ebb56aca9bdc3
- path: output/cnvkit/test_paired_end.sorted.cns
md5sum: 060af1aa637ed51812af19bcce24fcfe
- path: output/cnvkit/test_paired_end.sorted.bintest.cns
md5sum: 6544d979475def8a9f69ba42a985668d
- path: output/cnvkit/test_paired_end.sorted.call.cns
md5sum: f2ca59b4d50b0c317adc526c1b99b622