Merge pull request #11 from MillironX/feature/variations-from-variant

This commit is contained in:
Thomas A. Christensen II 2022-07-05 09:16:03 -05:00 committed by GitHub
commit e79968c4ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 2 deletions

View file

@ -21,7 +21,7 @@ TODO now:
""" """
using BioAlignments: BioAlignments, PairwiseAlignment using BioAlignments: BioAlignments, PairwiseAlignment
using BioGenerics: BioGenerics, leftposition using BioGenerics: BioGenerics, leftposition, rightposition
using BioSequences: BioSequences, BioSequence, NucleotideSeq, AminoAcidSeq, LongSequence, isgap using BioSequences: BioSequences, BioSequence, NucleotideSeq, AminoAcidSeq, LongSequence, isgap
using BioSymbols: BioSymbol using BioSymbols: BioSymbol
@ -109,6 +109,7 @@ struct Edit{S <: BioSequence, T <: BioSymbol}
end end
Base.:(==)(e1::Edit, e2::Edit) = e1.pos == e2.pos && e1.x == e2.x Base.:(==)(e1::Edit, e2::Edit) = e1.pos == e2.pos && e1.x == e2.x
Base.hash(x::Edit, h::UInt) = hash(Edit, hash((x.x, x.pos), h)) Base.hash(x::Edit, h::UInt) = hash(Edit, hash((x.x, x.pos), h))
Base.length(e::Edit) = e isa Substitution ? 1 : length(mutation(e))
function Base.parse(::Type{T}, s::AbstractString) where {T <: Edit{Se, Sy}} where {Se, Sy} function Base.parse(::Type{T}, s::AbstractString) where {T <: Edit{Se, Sy}} where {Se, Sy}
parse(T, String(s)) parse(T, String(s))
@ -136,6 +137,17 @@ end
mutation(e::Edit) = e.x mutation(e::Edit) = e.x
BioGenerics.leftposition(e::Edit) = e.pos BioGenerics.leftposition(e::Edit) = e.pos
function BioGenerics.rightposition(e::Edit)
if mutation(e) isa Substitution
return leftposition(e)
elseif mutation(e) isa Insertion
return leftposition(e) + 1
elseif mutation(e) isa Deletion
return leftposition(e) + length(e) - 1
else
error("Unknown mutation type $(typeof(mutation(e)))")
end
end
#= #=
@noinline throw_parse_error(T, p::Integer) = error("Failed to parse $T at byte $p") @noinline throw_parse_error(T, p::Integer) = error("Failed to parse $T at byte $p")
@ -292,6 +304,9 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
return result return result
end end
edits(v::Variant) = v.edits
reference(v::Variant) = v.ref
function lendiff(edit::Edit) function lendiff(edit::Edit)
x = edit.x x = edit.x
x isa Substitution ? 0 : (x isa Deletion ? -length(x) : length(x.x)) x isa Substitution ? 0 : (x isa Deletion ? -length(x) : length(x.x))
@ -356,6 +371,7 @@ reference(v::Variation) = v.reference
edit(v::Variation) = v.edit edit(v::Variation) = v.edit
mutation(v::Variation) = mutation(edit(v)) mutation(v::Variation) = mutation(edit(v))
BioGenerics.leftposition(v::Variation) = leftposition(edit(v)) BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
BioGenerics.rightposition(v::Variation) = rightposition(edit(v))
Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit
Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h)) Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h))
@ -440,12 +456,21 @@ function translate(var::Variation{S, T}, aln::PairwiseAlignment{S, S}) where {S,
end end
end end
function variations(v::Variant)
vs = Vector{Variation}(undef, length(edits(v)))
for (i, e) in enumerate(edits(v))
vs[i] = Variation(reference(v), e)
end
return vs
end
export Insertion, export Insertion,
Deletion, Deletion,
Substitution, Substitution,
Variant, Variant,
Variation, Variation,
reference, reference,
mutation mutation,
variations
end # module end # module

View file

@ -57,3 +57,15 @@ end
@test mutation(del) isa Deletion @test mutation(del) isa Deletion
@test mutation(ins) isa Insertion @test mutation(ins) isa Insertion
end end
@testset "VariationRetrieval" begin
refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT"
read = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1))
aln = PairwiseAlignment(read, refseq)
var = Variant(aln)
sub = Variation(refseq, "A4T")
@test first(variations(var)) == sub
end