Compare commits

...

6 commits

Author SHA1 Message Date
2ff4181dd8
Fix deletion equality function
Fixes: da2cbbd528 ("Add equals functionality to Deletion")

Signed-off-by: Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>
2023-01-10 11:41:08 -06:00
5bd939a15c Update CHANGELOG 2023-01-10 11:22:06 -06:00
6c86782a2c Update documentation to reflect new reconstruct syntax 2023-01-10 11:22:06 -06:00
b870007826 Add test for reconstruct function 2023-01-10 11:22:06 -06:00
b80dd8c950 Rename reconstruct! function to reconstruct
The exclamation mark is conventionally used to mark mutating functions, but
reconstruct is no longer mutating, so change the notation.
2023-01-10 11:22:06 -06:00
adbf0ce7f1 Refactor reconstruct! to use the built-in reference sequence
reconstruct! previously allow changing of _any_ sequence to match a
haplotype, with no check to see if that sequence is compatible with the
reference sequence the haplotype is based on. Change that to only allow
reconstructing sequences from the reference sequence itself, making this a
non-mutating function.
2023-01-10 11:22:06 -06:00
7 changed files with 21 additions and 16 deletions

View file

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Code now follows [Blue style](https://github.com/invenia/BlueStyle) ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
- :bomb: [BREAKING] Public and private API defined based on Blue style guidelines ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
- :bomb: [BREAKING] Renamed type `Variant` to `Haplotype` ([#20](https://github.com/BioJulia/SequenceVariation.jl/issues/20)/[#29](https://github.com/BioJulia/SequenceVariation.jl/pull/29))
- :bomb: [BREAKING] Refactored `reconstruct` function to use `Haplotype`'s reference sequence ([#30](https://github.com/BioJulia/SequenceVariation.jl/pull/30))
### Removed

View file

@ -21,7 +21,7 @@ Insertion
Haplotype
reference(::Haplotype)
variations
reconstruct!
reconstruct
```
## Variations

View file

@ -31,11 +31,10 @@ bos_human_haplotype = Haplotype(bos_human_alignment)
If the alternate sequence of a haplotype is no longer available (as is often the
case when calling variants from alignment files), then the sequence can be
retrieved using the [`reconstruct!`](@ref) function.
retrieved using the [`reconstruct`](@ref) function.
```@repl call_variants
human2 = copy(bovine);
reconstruct!(human2, bos_human_haplotype)
human2 = reconstruct(bos_human_haplotype)
human2 == bovine
human2 == human
```

View file

@ -162,21 +162,22 @@ reference(h::Haplotype) = h.ref
Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits
"""
reconstruct!(seq::S, x::Haplotype{S}) where {S}
reconstruct(h::Haplotype)
Apply the edits in `x` to `seq` and return the mutated sequence
Apply the edits in `h` to the reference sequence of `h` and return the mutated sequence
"""
function reconstruct!(seq::S, x::Haplotype{S}) where {S}
len = length(x.ref) + sum(edit -> _lendiff(edit), _edits(x))
function reconstruct(h::Haplotype)
len = length(reference(h)) + sum(edit -> _lendiff(edit), _edits(h))
seq = copy(reference(h))
resize!(seq, len % UInt)
refpos = seqpos = 1
for edit in x.edits
while refpos < edit.pos
seq[seqpos] = x.ref[refpos]
for edit in _edits(h)
while refpos < leftposition(edit)
seq[seqpos] = reference(h)[refpos]
refpos += 1
seqpos += 1
end
editx = edit.x
editx = _mutation(edit)
if editx isa Substitution
seq[seqpos] = editx.x
seqpos += 1
@ -184,14 +185,14 @@ function reconstruct!(seq::S, x::Haplotype{S}) where {S}
elseif editx isa Deletion
refpos += editx.len
elseif editx isa Insertion
for i in editx.x
for i in editx.seq
seq[seqpos] = i
seqpos += 1
end
end
end
while seqpos length(seq)
seq[seqpos] = x.ref[refpos]
seq[seqpos] = reference(h)[refpos]
refpos += 1
seqpos += 1
end

View file

@ -32,7 +32,7 @@ export Substitution
export Variation
export altbases
export mutation
export reconstruct!
export reconstruct
export refbases
export reference
export translate

View file

@ -15,7 +15,7 @@ end
Deletion(x::Integer) = Deletion(convert(UInt, x))
Base.length(x::Deletion) = Int(x.len)
Base.:(==)(x::Substitution, y::Substitution) = length(x) == length(y)
Base.:(==)(x::Deletion, y::Deletion) = length(x) == length(y)
Base.hash(x::Deletion, h::UInt) = hash(Deletion, hash(x.len, h))
function _refbases(d::Deletion, reference::S, pos::UInt) where {S<:BioSequence}

View file

@ -43,6 +43,10 @@ var = Haplotype(align(seq1, seq2))
end
end
@testset "HaplotypeReconstruction" begin
@test reconstruct(var) == seq1
end
@testset "VariationPosition" begin
refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT"