* First commit

* putting correct links for singularity and docker containers (just had to search for bioconda+ascat to find them, and then put them in like the rest of the nf-core tools had it

* adding first try of relevant commands (not working yet, just took their basic pipeline example

* test commit

* remove test

* starting up work with module after 3.0.0 upgrade

* add ascat.prepareHTS statemet

* add location of docker for new mulled alleleCounter+ASCAT container

* first full run with ASCAT on HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam

* add notes on dropbox download

* use a newer pytest_modules.yml

* add outpit

* trying to align with current Sarek output

* adding in FH comments

* busy clearing up arguments and testing. Still WIP

* first working run, in nextflow, with sarek-like output. Still needs more work on input arguments

* cleaning up before writing up findings

* testing with putting in arguments in args

* draft for solution 3 style for arguments

* one more test added

* adding FH map

* finished testing maps for args

* wrap-up cram/crai test successfully

* updates to address ability to put in ref.fasta argument for cram running

* adding remaining import-HTS commands in as args, and removing the chr21/chr22 only testing to test-nextflow.config

* first test with auto-downloading the s3-data (when not given as an argument)

* removing download-logic for supporting files, documenting in meta.yml, fixing ref_fasta bug

* adding mulled singularity container

* removing tests

* fix left padding lint issue

* lint failure in meta.yml

* more linting errors

* add when argument

* adding stub functionality

* add stub run

* correct md5sum for versions.yml

* more testing with -runstub

* stub code in pure bash - not mixed with R

* reformat version.yml

* get rid of absolute paths in test.yml

* correct wrong md5sum

* adding allelecount conda link

* rename normal_bam to input_bam etc

* let the pipeline dev worry about matching the right loci and allele files

* dont hardcode default genomebuild

* adding download instruction comment

* add doi

* fix conda addition bug

* add args documentation

* test new indent

* new test with meta.yml indentation

* retry with new meta.yml

* retry with new meta.yml - now with empty lines around

* retry with new meta.yml - remove trailing whitepsace

* trying to fix found quote character that cannot start any token error

* try with one empty line above triple-quote and no empty line below

* trying with pipe character

* checking if its the ending triple quote

* one more try with meta.yml

* test update bioconda versioning for linting failure

* test update bioconda versioning for linting failure 2

* testing allelecounter version error on conda

Co-authored-by: @lassefolkersen 
Co-authored-by: @FriederikeHanssen
This commit is contained in:
Lasse Folkersen 2022-03-15 11:18:43 +01:00 committed by GitHub
parent 86ac223916
commit d6244b42f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 379 additions and 0 deletions

155
modules/ascat/main.nf Normal file
View file

@ -0,0 +1,155 @@
process ASCAT {
tag "$meta.id"
label 'process_medium'
conda (params.enable_conda ? "bioconda::ascat=3.0.0 bioconda::cancerit-allelecount-4.3.0": null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0':
'quay.io/biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0' }"
input:
tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor)
path(allele_files)
path(loci_files)
output:
tuple val(meta), path("*png"), emit: png
tuple val(meta), path("*cnvs.txt"), emit: cnvs
tuple val(meta), path("*purityploidy.txt"), emit: purityploidy
tuple val(meta), path("*segments.txt"), emit: segments
path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def gender = args.gender ? "$args.gender" : "NULL"
def genomeVersion = args.genomeVersion ? "$args.genomeVersion" : "NULL"
def purity = args.purity ? "$args.purity" : "NULL"
def ploidy = args.ploidy ? "$args.ploidy" : "NULL"
def gc_files = args.gc_files ? "$args.gc_files" : "NULL"
def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : ""
def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : ""
def min_base_qual_arg = args.min_base_qual ? ",min_base_qual = $args.min_base_qual" : ""
def min_map_qual_arg = args.min_map_qual ? ",min_map_qual = $args.min_map_qual" : ""
def ref_fasta_arg = args.ref_fasta ? ",ref.fasta = '$args.ref_fasta'" : ""
def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ",skip_allele_counting_tumour = $args.skip_allele_counting_tumour" : ""
def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ",skip_allele_counting_normal = $args.skip_allele_counting_normal" : ""
"""
#!/usr/bin/env Rscript
library(RColorBrewer)
library(ASCAT)
options(bitmapType='cairo')
#prepare from BAM files
ascat.prepareHTS(
tumourseqfile = "$input_tumor",
normalseqfile = "$input_normal",
tumourname = "Tumour",
normalname = "Normal",
allelecounter_exe = "alleleCounter",
alleles.prefix = "$allele_files",
loci.prefix = "$loci_files",
gender = "$gender",
genomeVersion = "$genomeVersion",
nthreads = $task.cpus
$minCounts_arg
$chrom_names_arg
$min_base_qual_arg
$min_map_qual_arg
$ref_fasta_arg
$skip_allele_counting_tumour_arg
$skip_allele_counting_normal_arg
)
#Load the data
ascat.bc = ascat.loadData(
Tumor_LogR_file = "Tumour_tumourLogR.txt",
Tumor_BAF_file = "Tumour_normalBAF.txt",
Germline_LogR_file = "Tumour_normalLogR.txt",
Germline_BAF_file = "Tumour_normalBAF.txt",
genomeVersion = "$genomeVersion",
gender = "$gender"
)
#optional GC wave correction
if(!is.null($gc_files)){
ascat.bc = ascat.GCcorrect(ascat.bc, $gc_files)
}
#Plot the raw data
ascat.plotRawData(ascat.bc)
#Segment the data
ascat.bc = ascat.aspcf(ascat.bc)
#Plot the segmented data
ascat.plotSegmentedData(ascat.bc)
#Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers
#If psi and rho are manually set:
if (!is.null($purity) && !is.null($ploidy)){
ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy)
} else if(!is.null($purity) && is.null($ploidy)){
ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity)
} else if(!is.null($ploidy) && is.null($purity)){
ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy)
} else {
ascat.output <- ascat.runAscat(ascat.bc, gamma=1)
}
#Write out segmented regions (including regions with one copy of each allele)
write.table(ascat.output[["segments"]], file=paste0("$prefix", ".segments.txt"), sep="\t", quote=F, row.names=F)
#Write out CNVs in bed format
cnvs=ascat.output[["segments"]][2:6]
write.table(cnvs, file=paste0("$prefix",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T)
#Write out purity and ploidy info
summary <- tryCatch({
matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) {
# error handler picks up where error was generated
print(paste("Could not find optimal solution: ",err))
return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE))
}
)
colnames(summary) <- c("AberrantCellFraction","Ploidy")
write.table(summary, file=paste0("$prefix",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T)
#version export. Have to hardcode process name and software name because
#won't run inside an R-block
version_file_path="versions.yml"
f <- file(version_file_path,"w")
writeLines("ASCAT:", f)
writeLines(" ascat: 3.0.0",f)
close(f)
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.cnvs.txt
touch ${prefix}.purityploidy.txt
touch ${prefix}.segments.txt
touch Tumour.ASCATprofile.png
touch Tumour.ASPCF.png
touch Tumour.germline.png
touch Tumour.rawprofile.png
touch Tumour.sunrise.png
touch Tumour.tumour.png
echo 'ASCAT:' > versions.yml
echo ' ascat: 3.0.0' >> versions.yml
"""
}

92
modules/ascat/meta.yml Normal file
View file

@ -0,0 +1,92 @@
name: ascat
description: copy number profiles of tumour cells.
keywords:
- sort
tools:
- ascat:
description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome).
homepage: None
documentation: None
tool_dev_url: https://github.com/Crick-CancerGenomics/ascat
doi: "10.1093/bioinformatics/btaa538"
licence: ['GPL v3']
input:
- args:
type: map
description: |
Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. <optional> parameters can be removed from the map, if they are not set. For default values, please check the documentation above.
```
{
[
"gender": "XX",
"genomeVersion": "hg19"
"purity": <optional>,
"ploidy": <optional>,
"gc_files": <optional>,
"minCounts": <optional>,
"chrom_names": <optional>,
"min_base_qual": <optional>,
"min_map_qual": <optional>,
"ref_fasta": <optional>,
"skip_allele_counting_tumour": <optional>,
"skip_allele_counting_normal": <optional>
]
}
```
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input_normal:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- index_normal:
type: file
description: index for normal_bam
pattern: "*.{bai}"
- input_tumor:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- index_tumor:
type: file
description: index for tumor_bam
pattern: "*.{bai}"
- allele_files:
type: file
description: allele files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS
- loci_files:
type: file
description: loci files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- png:
type: file
description: ASCAT plots
pattern: "*.{png}"
- purityploidy:
type: file
description: purity and ploidy data
pattern: "*.purityploidy.txt"
- segments:
type: file
description: segments data
pattern: "*.segments.txt"
authors:
- "@aasNGC"
- "@lassefolkersen"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -46,6 +46,10 @@ artic/minion:
- modules/artic/minion/**
- tests/modules/artic/minion/**
ascat:
- modules/ascat/**
- tests/modules/ascat/**
assemblyscan:
- modules/assemblyscan/**
- tests/modules/assemblyscan/**

View file

@ -0,0 +1,64 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { ASCAT as ASCAT_SIMPLE} from '../../../modules/ascat/main.nf'
include { ASCAT as ASCAT_PLOIDY_AND_PURITY} from '../../../modules/ascat/main.nf'
include { ASCAT as ASCAT_CRAM} from '../../../modules/ascat/main.nf'
workflow test_ascat {
input = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true)
]
ASCAT_SIMPLE ( input , [], [])
}
// extended tests running with 1000 genomes data. Data is downloaded as follows:
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai
// wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00155/alignment/HG00155.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam
// wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00155/alignment/HG00155.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai
//workflow test_ascat_with_ploidy_and_purity {
// input = [
// [ id:'test', single_end:false ], // meta map
// file("/home/ec2-user/input_files/bams/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam", checkIfExists: true),
// file("/home/ec2-user/input_files/bams/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai", checkIfExists: true),
// file("/home/ec2-user/input_files/bams/test2.bam", checkIfExists: true),
// file("/home/ec2-user/input_files/bams/test2.bam.bai", checkIfExists: true)
// ]
//
// ASCAT_PLOIDY_AND_PURITY ( input , "/home/ec2-user/input_files/allele_files/G1000_alleles_hg19_chr", "/home/ec2-user/input_files/loci_files/G1000_alleles_hg19_chr")
//}
// extended tests running with 1000 genomes data. Data is downloaded as follows:
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00145/alignment/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00145/alignment/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00146/alignment/HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai
// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00146/alignment/HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram
//workflow test_ascat_with_crams {
// input = [
// [ id:'test', single_end:false ], // meta map
// file("/home/ec2-user/input_files/crams/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram", checkIfExists: true),
// file("/home/ec2-user/input_files/crams/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai", checkIfExists: true),
// file("/home/ec2-user/input_files/crams/duplicate_test.cram", checkIfExists: true),
// file("/home/ec2-user/input_files/crams/duplicate_test.cram.crai", checkIfExists: true)
// ]
//
// ASCAT_CRAM ( input , "/home/ec2-user/input_files/allele_files/G1000_alleles_hg19_chr", "/home/ec2-user/input_files/loci_files/G1000_alleles_hg19_chr")
//}

View file

@ -0,0 +1,39 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: ASCAT_SIMPLE {
ext.args = [
gender : 'XY',
genomeVersion : 'hg19',
minCounts : '1',
min_base_qual : '1',
min_map_qual : '1',
chrom_names : 'c("21","22")'
]
}
withName: ASCAT_PLOIDY_AND_PURITY {
ext.args = [
gender : 'XX',
genomeVersion : 'hg19',
ploidy : '1.7',
purity : '0.24',
chrom_names : 'c("21","22")'
]
}
withName: ASCAT_CRAM {
ext.args = [
gender : 'XX',
genomeVersion : 'hg19',
ref_fasta : '/home/ec2-user/input_files/fasta/human_g1k_v37.fasta',
chrom_names : 'c("21","22")'
]
}
}

View file

@ -0,0 +1,25 @@
- name: ascat test_ascat
command: nextflow run tests/modules/ascat -entry test_ascat -c tests/config/nextflow.config -stub-run
tags:
- ascat
files:
- path: output/ascat/Tumour.ASCATprofile.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/Tumour.ASPCF.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/Tumour.germline.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/Tumour.rawprofile.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/Tumour.sunrise.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/Tumour.tumour.png
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/test.cnvs.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/test.purityploidy.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/test.segments.txt
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/ascat/versions.yml
md5sum: 1af20694ec11004c4f8bc0c609b06386