Compare commits

..

6 commits

Author SHA1 Message Date
2ff4181dd8
Fix deletion equality function
Fixes: da2cbbd528 ("Add equals functionality to Deletion")

Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>
2023-01-10 11:41:08 -06:00
5bd939a15c Update CHANGELOG 2023-01-10 11:22:06 -06:00
6c86782a2c Update documentation to reflect new reconstruct syntax 2023-01-10 11:22:06 -06:00
b870007826 Add test for reconstruct function 2023-01-10 11:22:06 -06:00
b80dd8c950 Rename reconstruct! function to reconstruct
The exclamation mark is conventionally used to mark mutating functions, but
reconstruct is no longer mutating, so change the notation.
2023-01-10 11:22:06 -06:00
adbf0ce7f1 Refactor reconstruct! to use the built-in reference sequence
reconstruct! previously allow changing of _any_ sequence to match a
haplotype, with no check to see if that sequence is compatible with the
reference sequence the haplotype is based on. Change that to only allow
reconstructing sequences from the reference sequence itself, making this a
non-mutating function.
2023-01-10 11:22:06 -06:00
7 changed files with 21 additions and 16 deletions

View file

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Code now follows [Blue style](https://github.com/invenia/BlueStyle) ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28)) - Code now follows [Blue style](https://github.com/invenia/BlueStyle) ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
- :bomb: [BREAKING] Public and private API defined based on Blue style guidelines ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28)) - :bomb: [BREAKING] Public and private API defined based on Blue style guidelines ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
- :bomb: [BREAKING] Renamed type `Variant` to `Haplotype` ([#20](https://github.com/BioJulia/SequenceVariation.jl/issues/20)/[#29](https://github.com/BioJulia/SequenceVariation.jl/pull/29)) - :bomb: [BREAKING] Renamed type `Variant` to `Haplotype` ([#20](https://github.com/BioJulia/SequenceVariation.jl/issues/20)/[#29](https://github.com/BioJulia/SequenceVariation.jl/pull/29))
- :bomb: [BREAKING] Refactored `reconstruct` function to use `Haplotype`'s reference sequence ([#30](https://github.com/BioJulia/SequenceVariation.jl/pull/30))
### Removed ### Removed

View file

@ -21,7 +21,7 @@ Insertion
Haplotype Haplotype
reference(::Haplotype) reference(::Haplotype)
variations variations
reconstruct! reconstruct
``` ```
## Variations ## Variations

View file

@ -31,11 +31,10 @@ bos_human_haplotype = Haplotype(bos_human_alignment)
If the alternate sequence of a haplotype is no longer available (as is often the If the alternate sequence of a haplotype is no longer available (as is often the
case when calling variants from alignment files), then the sequence can be case when calling variants from alignment files), then the sequence can be
retrieved using the [`reconstruct!`](@ref) function. retrieved using the [`reconstruct`](@ref) function.
```@repl call_variants ```@repl call_variants
human2 = copy(bovine); human2 = reconstruct(bos_human_haplotype)
reconstruct!(human2, bos_human_haplotype)
human2 == bovine human2 == bovine
human2 == human human2 == human
``` ```

View file

@ -162,21 +162,22 @@ reference(h::Haplotype) = h.ref
Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits
""" """
reconstruct!(seq::S, x::Haplotype{S}) where {S} reconstruct(h::Haplotype)
Apply the edits in `x` to `seq` and return the mutated sequence Apply the edits in `h` to the reference sequence of `h` and return the mutated sequence
""" """
function reconstruct!(seq::S, x::Haplotype{S}) where {S} function reconstruct(h::Haplotype)
len = length(x.ref) + sum(edit -> _lendiff(edit), _edits(x)) len = length(reference(h)) + sum(edit -> _lendiff(edit), _edits(h))
seq = copy(reference(h))
resize!(seq, len % UInt) resize!(seq, len % UInt)
refpos = seqpos = 1 refpos = seqpos = 1
for edit in x.edits for edit in _edits(h)
while refpos < edit.pos while refpos < leftposition(edit)
seq[seqpos] = x.ref[refpos] seq[seqpos] = reference(h)[refpos]
refpos += 1 refpos += 1
seqpos += 1 seqpos += 1
end end
editx = edit.x editx = _mutation(edit)
if editx isa Substitution if editx isa Substitution
seq[seqpos] = editx.x seq[seqpos] = editx.x
seqpos += 1 seqpos += 1
@ -184,14 +185,14 @@ function reconstruct!(seq::S, x::Haplotype{S}) where {S}
elseif editx isa Deletion elseif editx isa Deletion
refpos += editx.len refpos += editx.len
elseif editx isa Insertion elseif editx isa Insertion
for i in editx.x for i in editx.seq
seq[seqpos] = i seq[seqpos] = i
seqpos += 1 seqpos += 1
end end
end end
end end
while seqpos length(seq) while seqpos length(seq)
seq[seqpos] = x.ref[refpos] seq[seqpos] = reference(h)[refpos]
refpos += 1 refpos += 1
seqpos += 1 seqpos += 1
end end

View file

@ -32,7 +32,7 @@ export Substitution
export Variation export Variation
export altbases export altbases
export mutation export mutation
export reconstruct! export reconstruct
export refbases export refbases
export reference export reference
export translate export translate

View file

@ -15,7 +15,7 @@ end
Deletion(x::Integer) = Deletion(convert(UInt, x)) Deletion(x::Integer) = Deletion(convert(UInt, x))
Base.length(x::Deletion) = Int(x.len) Base.length(x::Deletion) = Int(x.len)
Base.:(==)(x::Substitution, y::Substitution) = length(x) == length(y) Base.:(==)(x::Deletion, y::Deletion) = length(x) == length(y)
Base.hash(x::Deletion, h::UInt) = hash(Deletion, hash(x.len, h)) Base.hash(x::Deletion, h::UInt) = hash(Deletion, hash(x.len, h))
function _refbases(d::Deletion, reference::S, pos::UInt) where {S<:BioSequence} function _refbases(d::Deletion, reference::S, pos::UInt) where {S<:BioSequence}

View file

@ -43,6 +43,10 @@ var = Haplotype(align(seq1, seq2))
end end
end end
@testset "HaplotypeReconstruction" begin
@test reconstruct(var) == seq1
end
@testset "VariationPosition" begin @testset "VariationPosition" begin
refseq = dna"ACAACTTTATCT" refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT" mutseq = dna"ACATCTTTATCT"