Refactor reconstruct! to use the built-in reference sequence

reconstruct! previously allow changing of _any_ sequence to match a
haplotype, with no check to see if that sequence is compatible with the
reference sequence the haplotype is based on. Change that to only allow
reconstructing sequences from the reference sequence itself, making this a
non-mutating function.
This commit is contained in:
Thomas A. Christensen II 2023-01-05 12:30:03 -06:00
parent 8a71715cd9
commit adbf0ce7f1

View file

@ -162,21 +162,22 @@ reference(h::Haplotype) = h.ref
Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits
""" """
reconstruct!(seq::S, x::Haplotype{S}) where {S} reconstruct!(h::Haplotype)
Apply the edits in `x` to `seq` and return the mutated sequence Apply the edits in `h` to the reference sequence of `h` and return the mutated sequence
""" """
function reconstruct!(seq::S, x::Haplotype{S}) where {S} function reconstruct!(h::Haplotype)
len = length(x.ref) + sum(edit -> _lendiff(edit), _edits(x)) len = length(reference(h)) + sum(edit -> _lendiff(edit), _edits(h))
seq = copy(reference(h))
resize!(seq, len % UInt) resize!(seq, len % UInt)
refpos = seqpos = 1 refpos = seqpos = 1
for edit in x.edits for edit in _edits(h)
while refpos < edit.pos while refpos < leftposition(edit)
seq[seqpos] = x.ref[refpos] seq[seqpos] = reference(h)[refpos]
refpos += 1 refpos += 1
seqpos += 1 seqpos += 1
end end
editx = edit.x editx = _mutation(edit)
if editx isa Substitution if editx isa Substitution
seq[seqpos] = editx.x seq[seqpos] = editx.x
seqpos += 1 seqpos += 1
@ -184,14 +185,14 @@ function reconstruct!(seq::S, x::Haplotype{S}) where {S}
elseif editx isa Deletion elseif editx isa Deletion
refpos += editx.len refpos += editx.len
elseif editx isa Insertion elseif editx isa Insertion
for i in editx.x for i in editx.seq
seq[seqpos] = i seq[seqpos] = i
seqpos += 1 seqpos += 1
end end
end end
end end
while seqpos length(seq) while seqpos length(seq)
seq[seqpos] = x.ref[refpos] seq[seqpos] = reference(h)[refpos]
refpos += 1 refpos += 1
seqpos += 1 seqpos += 1
end end