diff --git a/modules/ascat/main.nf b/modules/ascat/main.nf new file mode 100644 index 00000000..1d2bd96f --- /dev/null +++ b/modules/ascat/main.nf @@ -0,0 +1,155 @@ +process ASCAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::ascat=3.0.0 bioconda::cancerit-allelecount-4.3.0": null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0': + 'quay.io/biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:dfe5aaa885de434adb2b490b68972c5840c6d761-0' }" + + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + path(allele_files) + path(loci_files) + + output: + tuple val(meta), path("*png"), emit: png + tuple val(meta), path("*cnvs.txt"), emit: cnvs + tuple val(meta), path("*purityploidy.txt"), emit: purityploidy + tuple val(meta), path("*segments.txt"), emit: segments + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def gender = args.gender ? "$args.gender" : "NULL" + def genomeVersion = args.genomeVersion ? "$args.genomeVersion" : "NULL" + def purity = args.purity ? "$args.purity" : "NULL" + def ploidy = args.ploidy ? "$args.ploidy" : "NULL" + def gc_files = args.gc_files ? "$args.gc_files" : "NULL" + + def minCounts_arg = args.minCounts ? ",minCounts = $args.minCounts" : "" + def chrom_names_arg = args.chrom_names ? ",chrom_names = $args.chrom_names" : "" + def min_base_qual_arg = args.min_base_qual ? ",min_base_qual = $args.min_base_qual" : "" + def min_map_qual_arg = args.min_map_qual ? ",min_map_qual = $args.min_map_qual" : "" + def ref_fasta_arg = args.ref_fasta ? ",ref.fasta = '$args.ref_fasta'" : "" + def skip_allele_counting_tumour_arg = args.skip_allele_counting_tumour ? ",skip_allele_counting_tumour = $args.skip_allele_counting_tumour" : "" + def skip_allele_counting_normal_arg = args.skip_allele_counting_normal ? ",skip_allele_counting_normal = $args.skip_allele_counting_normal" : "" + + + + """ + #!/usr/bin/env Rscript + library(RColorBrewer) + library(ASCAT) + options(bitmapType='cairo') + + + #prepare from BAM files + ascat.prepareHTS( + tumourseqfile = "$input_tumor", + normalseqfile = "$input_normal", + tumourname = "Tumour", + normalname = "Normal", + allelecounter_exe = "alleleCounter", + alleles.prefix = "$allele_files", + loci.prefix = "$loci_files", + gender = "$gender", + genomeVersion = "$genomeVersion", + nthreads = $task.cpus + $minCounts_arg + $chrom_names_arg + $min_base_qual_arg + $min_map_qual_arg + $ref_fasta_arg + $skip_allele_counting_tumour_arg + $skip_allele_counting_normal_arg + ) + + + #Load the data + ascat.bc = ascat.loadData( + Tumor_LogR_file = "Tumour_tumourLogR.txt", + Tumor_BAF_file = "Tumour_normalBAF.txt", + Germline_LogR_file = "Tumour_normalLogR.txt", + Germline_BAF_file = "Tumour_normalBAF.txt", + genomeVersion = "$genomeVersion", + gender = "$gender" + ) + + #optional GC wave correction + if(!is.null($gc_files)){ + ascat.bc = ascat.GCcorrect(ascat.bc, $gc_files) + } + + #Plot the raw data + ascat.plotRawData(ascat.bc) + + #Segment the data + ascat.bc = ascat.aspcf(ascat.bc) + + #Plot the segmented data + ascat.plotSegmentedData(ascat.bc) + + #Run ASCAT to fit every tumor to a model, inferring ploidy, normal cell contamination, and discrete copy numbers + #If psi and rho are manually set: + if (!is.null($purity) && !is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity, psi_manual=$ploidy) + } else if(!is.null($purity) && is.null($ploidy)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, rho_manual=$purity) + } else if(!is.null($ploidy) && is.null($purity)){ + ascat.output <- ascat.runAscat(ascat.bc, gamma=1, psi_manual=$ploidy) + } else { + ascat.output <- ascat.runAscat(ascat.bc, gamma=1) + } + + #Write out segmented regions (including regions with one copy of each allele) + write.table(ascat.output[["segments"]], file=paste0("$prefix", ".segments.txt"), sep="\t", quote=F, row.names=F) + + #Write out CNVs in bed format + cnvs=ascat.output[["segments"]][2:6] + write.table(cnvs, file=paste0("$prefix",".cnvs.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #Write out purity and ploidy info + summary <- tryCatch({ + matrix(c(ascat.output[["aberrantcellfraction"]], ascat.output[["ploidy"]]), ncol=2, byrow=TRUE)}, error = function(err) { + # error handler picks up where error was generated + print(paste("Could not find optimal solution: ",err)) + return(matrix(c(0,0),nrow=1,ncol=2,byrow = TRUE)) + } + ) + colnames(summary) <- c("AberrantCellFraction","Ploidy") + write.table(summary, file=paste0("$prefix",".purityploidy.txt"), sep="\t", quote=F, row.names=F, col.names=T) + + #version export. Have to hardcode process name and software name because + #won't run inside an R-block + version_file_path="versions.yml" + f <- file(version_file_path,"w") + writeLines("ASCAT:", f) + writeLines(" ascat: 3.0.0",f) + close(f) + """ + + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cnvs.txt + touch ${prefix}.purityploidy.txt + touch ${prefix}.segments.txt + touch Tumour.ASCATprofile.png + touch Tumour.ASPCF.png + touch Tumour.germline.png + touch Tumour.rawprofile.png + touch Tumour.sunrise.png + touch Tumour.tumour.png + + echo 'ASCAT:' > versions.yml + echo ' ascat: 3.0.0' >> versions.yml + """ + + +} diff --git a/modules/ascat/meta.yml b/modules/ascat/meta.yml new file mode 100644 index 00000000..949afd6a --- /dev/null +++ b/modules/ascat/meta.yml @@ -0,0 +1,92 @@ +name: ascat +description: copy number profiles of tumour cells. +keywords: + - sort +tools: + - ascat: + description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). + homepage: None + documentation: None + tool_dev_url: https://github.com/Crick-CancerGenomics/ascat + doi: "10.1093/bioinformatics/btaa538" + licence: ['GPL v3'] + +input: + - args: + type: map + description: | + Groovy Map containing tool parameters. MUST follow the structure/keywords below and be provided via modules.config. Parameters must be set between quotes. parameters can be removed from the map, if they are not set. For default values, please check the documentation above. + + ``` + { + [ + "gender": "XX", + "genomeVersion": "hg19" + "purity": , + "ploidy": , + "gc_files": , + "minCounts": , + "chrom_names": , + "min_base_qual": , + "min_map_qual": , + "ref_fasta": , + "skip_allele_counting_tumour": , + "skip_allele_counting_normal": + ] + } + ``` + + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_normal: + type: file + description: index for normal_bam + pattern: "*.{bai}" + - input_tumor: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index_tumor: + type: file + description: index for tumor_bam + pattern: "*.{bai}" + - allele_files: + type: file + description: allele files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS + - loci_files: + type: file + description: loci files for ASCAT. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - png: + type: file + description: ASCAT plots + pattern: "*.{png}" + - purityploidy: + type: file + description: purity and ploidy data + pattern: "*.purityploidy.txt" + - segments: + type: file + description: segments data + pattern: "*.segments.txt" +authors: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 6ecab096..48c3bb7d 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -46,6 +46,10 @@ artic/minion: - modules/artic/minion/** - tests/modules/artic/minion/** +ascat: + - modules/ascat/** + - tests/modules/ascat/** + assemblyscan: - modules/assemblyscan/** - tests/modules/assemblyscan/** diff --git a/tests/modules/ascat/main.nf b/tests/modules/ascat/main.nf new file mode 100644 index 00000000..e1f4f798 --- /dev/null +++ b/tests/modules/ascat/main.nf @@ -0,0 +1,64 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ASCAT as ASCAT_SIMPLE} from '../../../modules/ascat/main.nf' +include { ASCAT as ASCAT_PLOIDY_AND_PURITY} from '../../../modules/ascat/main.nf' +include { ASCAT as ASCAT_CRAM} from '../../../modules/ascat/main.nf' + + + + +workflow test_ascat { + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + + ASCAT_SIMPLE ( input , [], []) +} + + + + + +// extended tests running with 1000 genomes data. Data is downloaded as follows: +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00154/alignment/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai +// wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00155/alignment/HG00155.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam +// wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase1/data/HG00155/alignment/HG00155.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai +//workflow test_ascat_with_ploidy_and_purity { +// input = [ +// [ id:'test', single_end:false ], // meta map +// file("/home/ec2-user/input_files/bams/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam", checkIfExists: true), +// file("/home/ec2-user/input_files/bams/HG00154.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123.bam.bai", checkIfExists: true), +// file("/home/ec2-user/input_files/bams/test2.bam", checkIfExists: true), +// file("/home/ec2-user/input_files/bams/test2.bam.bai", checkIfExists: true) +// ] +// +// ASCAT_PLOIDY_AND_PURITY ( input , "/home/ec2-user/input_files/allele_files/G1000_alleles_hg19_chr", "/home/ec2-user/input_files/loci_files/G1000_alleles_hg19_chr") +//} + + +// extended tests running with 1000 genomes data. Data is downloaded as follows: +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00145/alignment/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00145/alignment/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00146/alignment/HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai +// wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/HG00146/alignment/HG00146.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram +//workflow test_ascat_with_crams { +// input = [ +// [ id:'test', single_end:false ], // meta map +// file("/home/ec2-user/input_files/crams/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram", checkIfExists: true), +// file("/home/ec2-user/input_files/crams/HG00145.mapped.ILLUMINA.bwa.GBR.low_coverage.20120522.bam.cram.crai", checkIfExists: true), +// file("/home/ec2-user/input_files/crams/duplicate_test.cram", checkIfExists: true), +// file("/home/ec2-user/input_files/crams/duplicate_test.cram.crai", checkIfExists: true) +// ] +// +// ASCAT_CRAM ( input , "/home/ec2-user/input_files/allele_files/G1000_alleles_hg19_chr", "/home/ec2-user/input_files/loci_files/G1000_alleles_hg19_chr") +//} + + + diff --git a/tests/modules/ascat/nextflow.config b/tests/modules/ascat/nextflow.config new file mode 100644 index 00000000..3c6cc53a --- /dev/null +++ b/tests/modules/ascat/nextflow.config @@ -0,0 +1,39 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + + withName: ASCAT_SIMPLE { + ext.args = [ + gender : 'XY', + genomeVersion : 'hg19', + minCounts : '1', + min_base_qual : '1', + min_map_qual : '1', + chrom_names : 'c("21","22")' + ] + } + + + + withName: ASCAT_PLOIDY_AND_PURITY { + ext.args = [ + gender : 'XX', + genomeVersion : 'hg19', + ploidy : '1.7', + purity : '0.24', + chrom_names : 'c("21","22")' + ] + } + + withName: ASCAT_CRAM { + ext.args = [ + gender : 'XX', + genomeVersion : 'hg19', + ref_fasta : '/home/ec2-user/input_files/fasta/human_g1k_v37.fasta', + chrom_names : 'c("21","22")' + ] + } + +} + diff --git a/tests/modules/ascat/test.yml b/tests/modules/ascat/test.yml new file mode 100644 index 00000000..e46c66b4 --- /dev/null +++ b/tests/modules/ascat/test.yml @@ -0,0 +1,25 @@ +- name: ascat test_ascat + command: nextflow run tests/modules/ascat -entry test_ascat -c tests/config/nextflow.config -stub-run + tags: + - ascat + files: + - path: output/ascat/Tumour.ASCATprofile.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/Tumour.ASPCF.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/Tumour.germline.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/Tumour.rawprofile.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/Tumour.sunrise.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/Tumour.tumour.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/test.cnvs.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/test.purityploidy.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/test.segments.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/ascat/versions.yml + md5sum: 1af20694ec11004c4f8bc0c609b06386