diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index a3847ba..06c8aa7 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -21,7 +21,7 @@ TODO now: """ using BioAlignments: BioAlignments, PairwiseAlignment -using BioGenerics: BioGenerics, leftposition +using BioGenerics: BioGenerics, leftposition, rightposition using BioSequences: BioSequences, BioSequence, NucleotideSeq, AminoAcidSeq, LongSequence, isgap using BioSymbols: BioSymbol @@ -109,6 +109,7 @@ struct Edit{S <: BioSequence, T <: BioSymbol} end Base.:(==)(e1::Edit, e2::Edit) = e1.pos == e2.pos && e1.x == e2.x Base.hash(x::Edit, h::UInt) = hash(Edit, hash((x.x, x.pos), h)) +Base.length(e::Edit) = e isa Substitution ? 1 : length(mutation(e)) function Base.parse(::Type{T}, s::AbstractString) where {T <: Edit{Se, Sy}} where {Se, Sy} parse(T, String(s)) @@ -136,6 +137,17 @@ end mutation(e::Edit) = e.x BioGenerics.leftposition(e::Edit) = e.pos +function BioGenerics.rightposition(e::Edit) + if mutation(e) isa Substitution + return leftposition(e) + elseif mutation(e) isa Insertion + return leftposition(e) + 1 + elseif mutation(e) isa Deletion + return leftposition(e) + length(e) - 1 + else + error("Unknown mutation type $(typeof(mutation(e)))") + end +end #= @noinline throw_parse_error(T, p::Integer) = error("Failed to parse $T at byte $p") @@ -292,6 +304,9 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{ return result end +edits(v::Variant) = v.edits +reference(v::Variant) = v.ref + function lendiff(edit::Edit) x = edit.x x isa Substitution ? 0 : (x isa Deletion ? -length(x) : length(x.x)) @@ -356,6 +371,7 @@ reference(v::Variation) = v.reference edit(v::Variation) = v.edit mutation(v::Variation) = mutation(edit(v)) BioGenerics.leftposition(v::Variation) = leftposition(edit(v)) +BioGenerics.rightposition(v::Variation) = rightposition(edit(v)) Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h)) @@ -440,12 +456,21 @@ function translate(var::Variation{S, T}, aln::PairwiseAlignment{S, S}) where {S, end end +function variations(v::Variant) + vs = Vector{Variation}(undef, length(edits(v))) + for (i, e) in enumerate(edits(v)) + vs[i] = Variation(reference(v), e) + end + return vs +end + export Insertion, Deletion, Substitution, Variant, Variation, reference, - mutation + mutation, + variations end # module diff --git a/test/runtests.jl b/test/runtests.jl index 280f18a..cad0d7f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -57,3 +57,15 @@ end @test mutation(del) isa Deletion @test mutation(ins) isa Insertion end + +@testset "VariationRetrieval" begin + refseq = dna"ACAACTTTATCT" + mutseq = dna"ACATCTTTATCT" + + read = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1)) + aln = PairwiseAlignment(read, refseq) + var = Variant(aln) + + sub = Variation(refseq, "A4T") + @test first(variations(var)) == sub +end