From 719a1239927addf4f66bb7b65fe8a83ebe2f1cd4 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 4 Apr 2023 11:34:35 -0500 Subject: [PATCH] feat: Add cigar(::Haplotype) function --- src/Haplotype.jl | 29 +++++++++++++++++++++++++++++ src/SequenceVariation.jl | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/Haplotype.jl b/src/Haplotype.jl index 6d1274d..f5cf985 100644 --- a/src/Haplotype.jl +++ b/src/Haplotype.jl @@ -221,6 +221,35 @@ function reconstruct(h::Haplotype) return seq end +""" + cigar(hap::Haplotype{S,T}) where {S,T} + +Constructs a CIGAR string representing the alignment of the sequence of `hap` to its +reference. +""" +function BioAlignments.cigar(hap::Haplotype{S,T}) where {S,T} + cigar_string = String[] + + mismatch_vars = filter(var -> !isa(mutation(var), Substitution), variations(hap)) + + length(mismatch_vars) > 0 || return "$(length(reference(hap)))M" + + lastvar = first(mismatch_vars) + + leftposition(lastvar) > 1 && push!(cigar_string, "$(leftposition(lastvar))M") + + for var in mismatch_vars + push!(cigar_string, _cigar_between(lastvar, var)) + push!(cigar_string, _cigar(var)) + lastvar = var + end #for + + remaining_bases = length(reference(hap)) - rightposition(lastvar) + remaining_bases > 0 && push!(cigar_string, "$(remaining_bases)M") + + return join(cigar_string, "") +end + """ translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T} diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 0e8cadd..37b0a42 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -20,7 +20,7 @@ TODO now: * Add tests """ -using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP, sequence +using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP, cigar, sequence using BioGenerics: BioGenerics, leftposition, rightposition using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap using BioSymbols: BioSymbol