add controlfreec (#1333)

* add drafty controlfreec

* get sofatware version

* use maps in map

* update paths to new and soon-to-be merged test files, add more input docu

* Stab at documenting args map

* Update syntax

* Bit more description

* Make the linter happy

* tests pass locally

* Add outputs & docu

* tests are failing locally now :/ but cpn file can also be added

* All tests passing, need to update test data again to add folder

* Clean up files

* Clean up files

* Clean up files

* Don't know how to get the test to run with the direcotry for now. they pass locally though

* Make linter happy

* Name process back

* Update to use tar folder

* fix the checksum
This commit is contained in:
FriederikeHanssen 2022-02-28 19:08:58 +01:00 committed by GitHub
parent 38ffbfdb63
commit c189835b1b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 436 additions and 1 deletions

View file

@ -0,0 +1,158 @@
process CONTROLFREEC {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::control-freec=11.6" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1':
'quay.io/biocontainers/control-freec:11.6--h1b792b2_1' }"
input:
tuple val(meta), path(mpileup_normal), path(mpileup_tumor), path(cpn_normal), path(cpn_tumor), path(minipileup_normal), path(minipileup_tumor)
path fasta
path fai
path snp_position
path known_snps
path known_snps_tbi
path chr_directory
path mappability
path target_bed
path gccontent_profile
output:
tuple val(meta), path("*_ratio.BedGraph") , emit: bedgraph, optional: true
tuple val(meta), path("*_control.cpn") , emit: control_cpn
tuple val(meta), path("*_sample.cpn") , emit: sample_cpn
tuple val(meta), path("GC_profile.*.cpn") , emit: gcprofile_cpn, optional:true
tuple val(meta), path("*_BAF.txt") , emit: BAF
tuple val(meta), path("*_CNVs") , emit: CNV
tuple val(meta), path("*_info.txt") , emit: info
tuple val(meta), path("*_ratio.txt") , emit: ratio
tuple val(meta), path("config.txt") , emit: config
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
//"General" configurations
def bedgraphoutput = task.ext.args?["general"]?["bedgraphoutput"] ? "BedGraphOutput = ${task.ext.args["general"]["bedgraphoutput"]}" : ""
def chr_files = chr_directory ? "chrFiles =\${PWD}/${chr_directory}" : ""
def chr_length = fai ? "chrLenFile = \${PWD}/${fai}" : ""
def breakpointthreshold = task.ext.args?["general"]?["breakpointthreshold"] ? "breakPointThreshold = ${task.ext.args["general"]["breakpointthreshold"]}" : ""
def breakpointtype = task.ext.args?["general"]?["breakpointtype"] ? "breakPointType = ${task.ext.args["general"]["breakpointtype"]}" : ""
def coefficientofvariation = task.ext.args?["general"]?["coefficient"] ? "coefficientOfVariation = ${task.ext.args["general"]["coefficientofvariation"]}" : ""
def contamination = task.ext.args?["general"]?["contamination"] ? "contamination = ${task.ext.args["general"]["contamination"]}" : ""
def contaminationadjustment = task.ext.args?["general"]?["contaminationadjustment"] ? "contaminationAdjustment = ${task.ext.args["general"]["contaminationadjustment"]}" : ""
def degree = task.ext.args?["general"]?["degree"] ? "degree = ${task.ext.args["general"]["degree"]}" : ""
def forcegccontentnormalization = task.ext.args?["general"]?["forcegccontentnormalization"] ? "forceGCcontentNormalization = ${task.ext.args["general"]["forcegccontentnormalization"]}" : ""
def gccontentprofile = gccontent_profile ? "GCcontentProfile = ${gccontent_profile}" : ""
def mappability = mappability ? "gemMappabilityFile = \${PWD}/${mappability}" : ""
def intercept = task.ext.args?["general"]?["intercept"] ? "intercept = ${task.ext.args["general"]["intercept"]}" : ""
def mincnalength = task.ext.args?["general"]?["mincnalength"] ? "minCNAlength = ${task.ext.args["general"]["mincnalength"]}" : ""
def minmappabilityperwindow = task.ext.args?["general"]?["minmappabilityperwindow"] ? "minMappabilityPerWindow = ${task.ext.args["general"]["minmappabilityperwindow"]}" : ""
def minexpectedgc = task.ext.args?["general"]?["minexpectedgc"] ? "minExpectedGC = ${task.ext.args["general"]["minexpectedgc"]}" : ""
def maxexpectedgc = task.ext.args?["general"]?["maxexpectedgc"] ? "maxExpectedGC = ${task.ext.args["general"]["maxexpectedgc"]}" : ""
def minimalsubclonepresence = task.ext.args?["general"]?["minimalsubclonepresence"] ? "minimalSubclonePresence = ${task.ext.args["general"]["minimalsubclonepresence"]}" : ""
def noisydata = task.ext.args?["general"]?["noisydata"] ? "noisyData = ${task.ext.args["general"]["noisydata"]}" : ""
def output = task.ext.prefix ? "outputDir = \${PWD}/${task.ext.prefix}" : ""
def ploidy = task.ext.args?["general"]?["ploidy"] ? "ploidy = ${task.ext.args["general"]["ploidy"]}" : ""
def printNA = task.ext.args?["general"]?["printNA"] ? "printNA = ${task.ext.args["general"]["printNA"]}" : ""
def readcountthreshold = task.ext.args?["general"]?["readcountthreshold"] ? "readCountThreshold = ${task.ext.args["general"]["readcountthreshold"]}" : ""
def sex = task.ext.args?["general"]?["sex"] ? "sex = ${task.ext.args["general"]["sex"]}" : ""
def step = task.ext.args?["general"]?["step"] ? "step = ${task.ext.args["general"]["step"]}" : ""
def telocentromeric = task.ext.args?["general"]?["telocentromeric"] ? "telocentromeric = ${task.ext.args["general"]["telocentromeric"]} " : ""
def uniquematch = task.ext.args?["general"]?["uniquematch"] ? "uniqueMatch = ${task.ext.args["general"]["uniquematch"]}" : ""
def window = task.ext.args?["general"]?["window"] ? "window = ${task.ext.args["general"]["window"]}" : ""
//"Control" configurations
def matefile_normal = mpileup_normal ? "mateFile = \${PWD}/${mpileup_normal}" : ""
def matecopynumberfile_normal = cpn_normal ? "mateCopyNumberFile = \${PWD}/${cpn_normal}" : ""
def minipileup_normal = minipileup_normal ? "miniPileup = \${PWD}/${minipileup_normal}" : ""
def inputformat_normal = task.ext.args?["control"]?["inputformat"] ? "inputFormat = ${task.ext.args["control"]["inputformat"]}" : ""
def mateorientation_normal = task.ext.args?["control"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["control"]["mateorientation"]}" : ""
//"Sample" configuration
def matefile_tumor = mpileup_tumor ? "mateFile = \${PWD}/${mpileup_tumor}" : ""
def matecopynumberfile_tumor = cpn_tumor ? "mateCopyNumberFile = \${PWD}/${cpn_tumor}" : ""
def minipileup_tumor = minipileup_tumor ? "miniPileup = \${PWD}/${minipileup_tumor}" : ""
def inputformat_tumor = task.ext.args?["sample"]?["inputformat"] ? "inputFormat = ${task.ext.args["sample"]["inputformat"]}" : ""
def mateorientation_tumor = task.ext.args?["sample"]?["mateorientation"] ? "mateOrientation = ${task.ext.args["sample"]["mateorientation"]}" : ""
//"BAF" configuration
def makepileup = snp_position ? "makePileup = \${PWD}/${snp_position}" : ""
def fastafile = fasta ? "fastaFile = \${PWD}/${fasta}" : ""
def minimalcoverageperposition = task.ext.args?["BAF"]?["minimalcoverageperposition"] ? "minimalCoveragePerPosition = ${task.ext.args["BAF"]["minimalcoverageperposition"]}" : ""
def minimalqualityperposition = task.ext.args?["BAF"]?["minimalqualityperposition"] ? "minimalQualityPerPosition = ${task.ext.args["BAF"]["minimalqualityperposition"]}" : ""
def shiftinquality = task.ext.args?["BAF"]?["shiftinquality"] ? "shiftInQuality = ${task.ext.args["BAF"]["shiftinquality"]}" : ""
def snpfile = known_snps ? "SNPfile = \$PWD/${known_snps}" : ""
//"Target" configuration
def target_bed = target_bed ? "captureRegions = ${target_bed}" : ""
"""
touch config.txt
echo "[general]" >> config.txt
echo ${bedgraphoutput} >> config.txt
echo ${breakpointthreshold} >> config.txt
echo ${breakpointtype} >> config.txt
echo ${chr_files} >> config.txt
echo ${chr_length} >> config.txt
echo ${coefficientofvariation} >> config.txt
echo ${contamination} >> config.txt
echo ${contaminationadjustment} >> config.txt
echo ${degree} >> config.txt
echo ${forcegccontentnormalization} >> config.txt
echo ${gccontentprofile} >> config.txt
echo ${mappability} >> config.txt
echo ${intercept} >> config.txt
echo ${mincnalength} >> config.txt
echo ${minmappabilityperwindow} >> config.txt
echo ${minexpectedgc} >> config.txt
echo ${maxexpectedgc} >> config.txt
echo ${minimalsubclonepresence} >> config.txt
echo "maxThreads = ${task.cpus}" >> config.txt
echo ${noisydata} >> config.txt
echo ${output} >> config.txt
echo ${ploidy} >> config.txt
echo ${printNA} >> config.txt
echo ${readcountthreshold} >> config.txt
echo ${sex} >> config.txt
echo ${step} >> config.txt
echo ${telocentromeric} >> config.txt
echo ${uniquematch} >> config.txt
echo ${window} >> config.txt
echo "[control]" >> config.txt
echo ${matefile_normal} >> config.txt
echo ${matecopynumberfile_normal} >> config.txt
echo ${minipileup_normal} >> config.txt
echo ${inputformat_normal} >> config.txt
echo ${mateorientation_normal} >> config.txt
echo "[sample]" >> config.txt
echo ${matefile_tumor} >> config.txt
echo ${matecopynumberfile_tumor} >> config.txt
echo ${minipileup_tumor} >> config.txt
echo ${inputformat_tumor} >> config.txt
echo ${mateorientation_tumor} >> config.txt
echo "[BAF]" >> config.txt
echo ${makepileup} >> config.txt
echo ${fastafile} >> config.txt
echo ${minimalcoverageperposition} >> config.txt
echo ${minimalqualityperposition} >> config.txt
echo ${shiftinquality} >> config.txt
echo ${snpfile} >> config.txt
echo "[target]" >> config.txt
echo ${target_bed} >> config.txt
freec -conf config.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
controlfreec: \$(echo \$(freec -version 2>&1) | sed 's/^.*Control-FREEC //; s/:.*\$//' | sed -e "s/Control-FREEC v//g" )
END_VERSIONS
"""
}

View file

@ -0,0 +1,183 @@
name: controlfreec
description: Copy number and genotype annotation from whole genome and whole exome sequencing data
keywords:
- cna
- cnv
- somatic
- single
- tumor-only
tools:
- controlfreec:
description: Copy number and genotype annotation from whole genome and whole exome sequencing data.
homepage: http://boevalab.inf.ethz.ch/FREEC
documentation: http://boevalab.inf.ethz.ch/FREEC/tutorial.html
tool_dev_url: https://github.com/BoevaLab/FREEC/
doi: "10.1093/bioinformatics/btq635"
licence: ['GPL >=2']
input:
- args:
type: map
description: |
Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config.
<optional> parameters can be removed from the map, if they are not set. All value must be surrounded by quotes, meta map parameters can be set with, i.e. sex = meta.sex:
For default values, please check the documentation above.
```
{
[
"general" :[
"bedgraphoutput": <optional>,
"breakpointthreshold": <optional>,
"breakpointtype": <optional>,
"coefficientofvariation": <optional>,
"contamination": <optional>,
"contaminationadjustment": <optional>,
"degree": <optional>,
"forcegccontentnormalization": <optional>,
"gccontentprofile": <optional>,
"intercept": <optional>,
"mincnalength": <optional>,
"minmappabilityperwindow": <optional>,
"minexpectedgc": <optional>,
"maxexpectedgc": <optional>,
"minimalsubclonepresence": <optional>,
"noisydata": <optional>,
"ploidy": <optional>,
"printNA": <optional>,
"readcountthreshold": <optional >,
"sex": <optional>,
"step": <optional value>,
"telocentromeric": <optional>,
"uniquematch": <optional>,
"window": <optional>
],
"control":[
"inputformat": <required>,
"mateorientation": <optional>,
],
"sample":[
"inputformat": <required>,
"mateorientation": <optional>,
],
"BAF":[
"minimalcoverageperposition": <optional>,
"minimalqualityperposition": <optional>,
"shiftinquality": <optional>
]
]
}
```
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- mateFile_normal:
type: file
description: File with mapped reads
pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}"
- mateFile_tumor:
type: file
description: File with mapped reads
pattern: "*.{sam,bam,pileup(.gz),bowtie(.gz),eland(.gz),arachne(.gz),psl(.gz),bed(.gz)}"
- cpn_normal:
type: file
description: Raw copy number profiles (optional)
pattern: "*.cpn"
- cpn_tumor:
type: file
description: Raw copy number profiles (optional)
pattern: "*.cpn"
- minipileup_normal:
type: file
description: miniPileup file from previous run (optional)
pattern: "*.pileup"
- minipileup_tumor:
type: file
description: miniPileup file from previous run (optional)
pattern: "*.pileup"
- fasta:
type: file
description: Reference file (optional; required if args 'makePileup' is set)
pattern: "*.{fasta,fna,fa}"
- fai:
type: file
description: Fasta index
pattern: "*.fai"
- snp_position:
type: file
description:
pattern: "*.{}"
- known_snps:
type: file
description: File with known SNPs
pattern: "*.{vcf,vcf.gz}"
- known_snps_tbi:
type: file
description: Index of known_snps
pattern: "*.tbi"
- chr_directory:
type: file
description: Path to directory with chromosome fasta files (optional, required if gccontentprofile is not provided)
pattern: "*/"
- mappability:
type: file
description: Contains information of mappable positions (optional)
pattern: "*.gem"
- target_bed:
type: file
description: Sorted bed file containing capture regions (optional)
pattern: "*.bed"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bedgraph:
type: file
description: Bedgraph format for the UCSC genome browser
pattern: ".bedgraph"
- control_cpn:
type: file
description: files with raw copy number profiles
pattern: "*_control.cpn"
- sample_cpn:
type: file
description: files with raw copy number profiles
pattern: "*_sample.cpn"
- gcprofile_cpn:
type: file
description: file with GC-content profile.
pattern: "GC_profile.*.cpn"
- BAF:
type: file
description: file B-allele frequencies for each possibly heterozygous SNP position
pattern: "*_BAF.txt"
- CNV:
type: file
description: file with coordinates of predicted copy number alterations.
pattern: "*_CNVs"
- info:
type: file
description: parsable file with information about FREEC run
pattern: "*_info.txt"
- ratio:
type: file
description: file with ratios and predicted copy number alterations for each window
pattern: "*_ratio.txt"
- config:
type: file
description: Config file used to run Control-FREEC
pattern: "config.txt"
authors:
- "@FriederikeHanssen"

View file

@ -356,6 +356,10 @@ cnvkit/batch:
- modules/cnvkit/batch/**
- tests/modules/cnvkit/batch/**
controlfreec:
- modules/controlfreec/**
- tests/modules/controlfreec/**
cooler/cload:
- modules/cooler/cload/**
- tests/modules/cooler/cload/**

View file

@ -123,10 +123,12 @@ params {
genome_21_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta"
genome_21_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai"
genome_21_dict = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.dict"
genome_21_sizes = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.sizes"
genome_21_interval_list = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/genome.interval_list"
genome_21_multi_interval_bed = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed"
genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz"
genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi"
genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz"
dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz"
dbsnp_146_hg38_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi"
@ -272,6 +274,9 @@ params {
test_genome21_indels_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz"
test_genome21_indels_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/test.genome_21.somatic_sv.vcf.gz.tbi"
test_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test.mpileup.gz"
test2_mpileup = "${test_data_dir}/genomics/homo_sapiens/illumina/mpileup/test2.mpileup.gz"
test_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak"
test2_broadpeak = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak"

View file

@ -0,0 +1,37 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { CONTROLFREEC } from '../../../modules/controlfreec/main.nf'
include { UNTAR } from '../../../modules/untar/main.nf'
workflow test_controlfreec {
input = [
[ id:'test', single_end:false, sex:'XX' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_mpileup'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_mpileup'], checkIfExists: true),
[],[],[],[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
chrfiles = file(params.test_data['homo_sapiens']['genome']['genome_21_chromosomes_dir'], checkIfExists: true)
target_bed = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)
UNTAR(chrfiles)
CONTROLFREEC ( input,
fasta,
fai,
[],
dbsnp,
dbsnp_tbi,
UNTAR.out.untar,
[],
target_bed,
[]
)
}

View file

@ -0,0 +1,26 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName:CONTROLFREEC{
ext.args = { [
"sample":[
inputformat: 'pileup',
mateorientation: 'FR'
],
"general" :[
bedgraphoutput: "TRUE",
noisydata: "TRUE",
minexpectedgc: "0",
readcountthreshold: "1",
sex: meta.sex,
window: "10",
],
"control":[
inputformat: "pileup",
mateorientation: "FR"
]
]
}
}
}

View file

@ -0,0 +1,22 @@
- name: controlfreec test_controlfreec
command: nextflow run tests/modules/controlfreec -entry test_controlfreec -c tests/config/nextflow.config
tags:
- controlfreec
files:
- path: output/controlfreec/config.txt
- path: output/controlfreec/test.mpileup.gz_control.cpn
md5sum: 1768b571677c418560e5a8fe203bdc79
- path: output/controlfreec/test2.mpileup.gz_BAF.txt
md5sum: 3bb7437001cf061a77eaf87b8558c48d
- path: output/controlfreec/test2.mpileup.gz_CNVs
md5sum: 1f4f5834dbd1490afdb22f6d3091c4c9
- path: output/controlfreec/test2.mpileup.gz_info.txt
md5sum: 1a3055d35028525ccc9e693cc9f335e0
- path: output/controlfreec/test2.mpileup.gz_ratio.BedGraph
md5sum: 8ba455b232be20cdcc5bf1e4035e8032
- path: output/controlfreec/test2.mpileup.gz_ratio.txt
md5sum: b76b2434de710325069e37fb1e132760
- path: output/controlfreec/test2.mpileup.gz_sample.cpn
md5sum: c80dad58a77b1d7ba6d273999f4b4b4b
- path: output/controlfreec/versions.yml
md5sum: ff93f6466d4686aab708425782c6c848