feat: Add cigar(::Haplotype) function

This commit is contained in:
Thomas A. Christensen II 2023-04-04 11:34:35 -05:00
parent e84f765357
commit 719a123992
Signed by: millironx
GPG key ID: 09335146883990B9
2 changed files with 30 additions and 1 deletions

View file

@ -221,6 +221,35 @@ function reconstruct(h::Haplotype)
return seq
end
"""
cigar(hap::Haplotype{S,T}) where {S,T}
Constructs a CIGAR string representing the alignment of the sequence of `hap` to its
reference.
"""
function BioAlignments.cigar(hap::Haplotype{S,T}) where {S,T}
cigar_string = String[]
mismatch_vars = filter(var -> !isa(mutation(var), Substitution), variations(hap))
length(mismatch_vars) > 0 || return "$(length(reference(hap)))M"
lastvar = first(mismatch_vars)
leftposition(lastvar) > 1 && push!(cigar_string, "$(leftposition(lastvar))M")
for var in mismatch_vars
push!(cigar_string, _cigar_between(lastvar, var))
push!(cigar_string, _cigar(var))
lastvar = var
end #for
remaining_bases = length(reference(hap)) - rightposition(lastvar)
remaining_bases > 0 && push!(cigar_string, "$(remaining_bases)M")
return join(cigar_string, "")
end
"""
translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}

View file

@ -20,7 +20,7 @@ TODO now:
* Add tests
"""
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP, sequence
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP, cigar, sequence
using BioGenerics: BioGenerics, leftposition, rightposition
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
using BioSymbols: BioSymbol