Fix tests for _is_valid(::Haplotype)

This commit is contained in:
Thomas A. Christensen II 2023-01-21 18:10:46 -06:00
parent 2a5a5389ec
commit 57ebe391bf

View file

@ -49,43 +49,42 @@ function Base.show(io::IO, x::Haplotype)
end end
""" """
is_valid(h::Haplotype) _is_valid(h::Haplotype)
Validate `h`. `h` is invalid if any of its operations are out of bounds, or the same Validate `h`. `h` is invalid if any of its operations are out of bounds, or the same
position is affected by multiple edits. position is affected by multiple edits.
""" """
function _is_valid(h::Haplotype) function _is_valid(h::Haplotype)
isempty(h.ref) && return false isempty(reference(h)) && return (false, "Empty reference")
valid_positions = 1:length(h.ref) valid_positions = BitVector(ones(length(reference(h))))
last_was_insert = false insertion_bases = Integer[]
for edit in h.edits
pos = edit.pos for edit in _edits(h)
op = edit.x pos = leftposition(edit)
op = _mutation(edit)
# Sanity check: for this to be a valid variant, it must be comprised of valid # Sanity check: for this to be a valid variant, it must be comprised of valid
# variations # variations
_is_valid(Variation(h.ref, edit)) || return (false, "Invalid Variation") _is_valid(Variation(reference(h), edit)) || return (false, "Invalid Variation")
# For substitutions we simply do not allow another modification of the same base
if op isa Substitution if op isa Substitution
pos in valid_positions || # For substitutions we simply do not allow another modification of the same base
valid_positions[pos] ||
return (false, "Multiple modifications at same position") return (false, "Multiple modifications at same position")
valid_positions = (first(valid_positions) + 1):last(valid_positions) valid_positions[pos] = false
last_was_insert = false elseif op isa Insertion
# Insertions affect 0 reference bases, so it does not modify the valid positions # Insertions affect 0 reference bases, so it does not modify the valid positions
# for next op. However, we cannot have two insertions at the same position, because # for next op. However, we cannot have two insertions at the same position, because
# then the order of them is ambiguous # then the order of them is ambiguous
elseif op isa Insertion pos in insertion_bases && return (false, "Multiple insertions at same position")
pos in push!(insertion_bases, pos)
((first(valid_positions) - 1 + last_was_insert):(last(valid_positions) + 1)) ||
return (false, "Multiple insertions at same position")
last_was_insert = true
# Deletions obviously invalidate the reference bases that are deleted.
elseif op isa Deletion elseif op isa Deletion
len = length(op) # Deletions obviously invalidate the reference bases that are deleted.
pos in (first(valid_positions):(last(valid_positions) - len + 1)) || for i in pos:(pos + length(op))
return (false, "Deletion out of range") checkbounds(Bool, valid_positions, i) || return (false, "Deletion out of range")
valid_positions = (first(valid_positions) + len):last(valid_positions) valid_positions[i] || return (false, "Modifications in a deleted region")
last_was_insert = false valid_positions[i] = false
end
end end
end end
return (true, "") return (true, "")