This commit is contained in:
Jakob Nybo Nissen 2020-10-20 15:26:27 +02:00
parent ad3fe4303f
commit 64ae3bc862
2 changed files with 47 additions and 53 deletions

View file

@ -1,3 +1,9 @@
name = "SequenceVariation"
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>"]
version = "0.1.0"
[deps] [deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"

View file

@ -1,5 +1,4 @@
module SequenceVariation
module t
# TODO: Add functionality to move a Variation to a new reference # TODO: Add functionality to move a Variation to a new reference
# needs to be done with heavy checks to make sure the alignment of the two # needs to be done with heavy checks to make sure the alignment of the two
@ -25,83 +24,70 @@ Call Variations
using BioSequences using BioSequences
using BioAlignments using BioAlignments
import BioAlignments: OP_START, OP_SEQ_MATCH, OP_SEQ_MISMATCH, OP_INSERT, OP_DELETE import BioAlignments: OP_START, OP_SEQ_MATCH, OP_SEQ_MISMATCH, OP_INSERT, OP_DELETE
import BioSymbols: BioSymbol
abstract type Variation end abstract type Edit end
struct Substitution{T} <: Variation struct Substitution{T <: BioSymbol} <: Edit
alt::T symbol::T
end end
struct Deletion <: Variation struct Deletion <: Edit
len::Int len::Int
end end
struct Insertion{A} <: Variation struct Insertion{S <: BioSequence} <: Edit
seq::LongSequence{A} seq::S
end end
Base.:(==)(x::Insertion{A}, y::Insertion{A}) where A = x.seq == y.seq Base.:(==)(x::Insertion{A}, y::Insertion{A}) where A = x.seq == y.seq
# Metadata (such as sequence identifier) is intentionally left out struct Diff{E <: Edit}
struct SeqVar{A <: Alphabet, V <: Variation}
ref::LongSequence{A}
pos::Int pos::Int
var::V edit::E
function SeqVar{A, V}(ref::LongSequence{A}, pos::Int, var::V) where {A <: Alphabet, V <: Variation}
n = new(ref, pos, var)
checkvar(n)
return n
end
end end
function SeqVar{A}(ref::LongSequence{A}, pos::Int, var::Variation) where A struct Variant{S <: BioSequence, E <: Edit}
return SeqVar{A,typeof(var)}(ref, pos, var) ref::S
diffs::Vector{Diff{E}}
end end
function SeqVar(ref::LongSequence{A}, pos::Int, var::Variation) where A struct Variation{S <: BioSequence, E <: Edit}
return SeqVar{A, typeof(var)}(ref, pos, var) ref::S
diff::Diff{E}
end end
function checkvar(x::SeqVar{A, Deletion}) where A ###
checkbounds(x.ref, x.pos:x.pos + x.var.len - 1) function check(v::Variation{S, <:Substitution{T}}) where {S, T}
T == eltype(S) || throw(TypeError(:check, "", eltype(S), T))
checkbounds(v.ref, v.diff.pos)
end end
function checkvar(x::SeqVar{A, Substitution{T}}) where {A, T} check(v::Variation{S, Deletion}) where S = checkbounds(v.ref, v.diff.pos:(v.diff.pos+v.diff.edit.len)-1)
if T !== eltype(A)
throw(ArgumentError("Substitution type must be alphabet eltype")) function check(v::Variation{S, <:Insertion}) where S
end length(v.diff.edit.seq) > 0 || throw(ArgumentError("Insertions cannot be length 0"))
checkbounds(x.ref, x.pos) # We can have insertions at the very end, after the reference sequence
v.diff.pos == lastindex(v.ref) + 1 && return nothing
checkbounds(v.ref, v.diff.pos)
end end
function checkvar(x::SeqVar{A, Insertion{A}}) where A Base.show(io::IO, x::Diff{<:Substitution}) = print(io, x.pos, x.edit.symbol)
if length(x.var.seq) < 1 Base.show(io::IO, x::Diff{Deletion}) = print(io, 'Δ', x.pos, '-', x.pos + x.edit.len - 1)
throw(ArgumentError("Insertions cannot be empty")) Base.show(io::IO, x::Diff{<:Insertion}) = print(io, x.pos, x.edit.seq)
end
if x.pos 1:lastindex(x.ref)+1 function Base.show(io::IO, x::Variant{S, <:Substitution}) where S
throw(BoundsError(x.ref, x.pos)) print(io, x.ref[x.diff.pos], x.diff.pos, x.diff.edit.symbol)
end
end end
function Base.show(io::IO, x::SeqVar{A, Deletion}) where A Base.show(io::IO, x::Variant{S, Deletion}) where S = show(io, x.diff)
print(io, 'Δ', x.pos) Base.show(io::IO, x::Variant{S, <:Insertion} where S) = show(io, x.diff)
if x.var.len > 1
print(io, '-', x.pos + x.var.len - 1)
end
end
function Base.show(io::IO, x::SeqVar{A, <:Substitution}) where A Base.:(==)(x::T, y::T) where {T <: Variant} = (x.ref === y.ref) & (x.diff == y.diff)
print(io, x.ref[x.pos], x.pos, x.var.alt)
end
function Base.show(io::IO, x::SeqVar{A, Insertion{A}}) where A #################
print(io, x.pos, x.var.seq)
end
function Base.:(==)(x::T, y::T) where {T <: SeqVar}
return x.ref === y.ref && x.pos == y.pos && x.var == y.var
end
#=
function variations(ref::S, refaln::S, seqaln::S) where {S <: BioSequence} function variations(ref::S, refaln::S, seqaln::S) where {S <: BioSequence}
aln = AlignedSequence(seqaln, refaln) aln = AlignedSequence(seqaln, refaln)
return variations(ref, refaln, seqaln, aln.aln.anchors) return variations(ref, refaln, seqaln, aln.aln.anchors)
@ -214,5 +200,7 @@ function reconstruct(v::Vector{<:SeqVar{A}}) where A
oldpos, newpos = 1, 1 oldpos, newpos = 1, 1
for i in srt for i in srt
=# =#
=#
end
end # module