Merge pull request #16 from MillironX/feature/getbases

This commit is contained in:
Thomas A. Christensen II 2022-07-14 15:00:10 +00:00 committed by GitHub
commit 6a77d6d589
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 79 additions and 2 deletions

View file

@ -14,5 +14,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Variation` type to store and compare individual mutations
- `reconstruct!` function to build mutated sequences from `Variant`s
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
- Methods to get reference and alternate bases from a `Variation`
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl

View file

@ -367,7 +367,7 @@ function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
return Variation{S,T}(ref, e)
end
reference(v::Variation) = v.reference
reference(v::Variation) = v.ref
edit(v::Variation) = v.edit
mutation(v::Variation) = mutation(edit(v))
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
@ -464,6 +464,50 @@ function variations(v::Variant)
return vs
end
function _refbases(s::Substitution, reference::S, pos::UInt) where S <: BioSequence
return S([reference[pos]])
end
function _altbases(s::Substitution, reference::S, pos::UInt) where S <: BioSequence
return S([s.x])
end
function _refbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence
if pos == 1
return S(reference[UnitRange{Int}(pos, pos+length(d))])
else
return S(reference[UnitRange{Int}(pos-1, pos+length(d)-1)])
end
end
function _altbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence
if pos == 1
return S([reference[pos+1]])
else
return S([reference[pos-1]])
end
end
function _refbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence
return S([reference[pos]])
end
function _altbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence
if pos == 1
return S([i.seq..., reference[pos]])
else
return S([reference[pos], i.seq...])
end
end
function refbases(v::Variation)
return _refbases(mutation(v), reference(v), leftposition(v))
end
function altbases(v::Variation)
return _altbases(mutation(v), reference(v), leftposition(v))
end
export Insertion,
Deletion,
Substitution,
@ -471,6 +515,8 @@ export Insertion,
Variation,
reference,
mutation,
variations
variations,
refbases,
altbases
end # module

View file

@ -69,3 +69,33 @@ end
sub = Variation(refseq, "A4T")
@test first(variations(var)) == sub
end
@testset "VariationBases" begin
# Test substition bases
@test refbases(Variation(dna"ATCGA", "C3G")) == dna"C"
@test altbases(Variation(dna"ATCGA", "C3G")) == dna"G"
# Test single deletion bases
@test refbases(Variation(dna"ATCGA", "Δ3-3")) == dna"TC"
@test altbases(Variation(dna"ATCGA", "Δ3-3")) == dna"T"
# Test multiple deletion bases
@test refbases(Variation(dna"ATCGA", "Δ3-4")) == dna"TCG"
@test altbases(Variation(dna"ATCGA", "Δ3-4")) == dna"T"
# Test first position deletion
@test refbases(Variation(dna"ATCGA", "Δ1-1")) == dna"AT"
@test altbases(Variation(dna"ATCGA", "Δ1-1")) == dna"T"
# Test single insertion bases
@test refbases(Variation(dna"ATCGA", "3A")) == dna"C"
@test altbases(Variation(dna"ATCGA", "3A")) == dna"CA"
# Test multiple insertion bases
@test refbases(Variation(dna"ATCGA", "3TAG")) == dna"C"
@test altbases(Variation(dna"ATCGA", "3TAG")) == dna"CTAG"
# Test first position insertion
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
end