mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-25 14:49:55 +00:00
Rewrite
This commit is contained in:
parent
ad3fe4303f
commit
64ae3bc862
2 changed files with 47 additions and 53 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
name = "SequenceVariation"
|
||||||
|
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
||||||
|
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>"]
|
||||||
|
version = "0.1.0"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||||
|
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
|
module SequenceVariation
|
||||||
module t
|
|
||||||
|
|
||||||
# TODO: Add functionality to move a Variation to a new reference
|
# TODO: Add functionality to move a Variation to a new reference
|
||||||
# needs to be done with heavy checks to make sure the alignment of the two
|
# needs to be done with heavy checks to make sure the alignment of the two
|
||||||
|
@ -25,83 +24,70 @@ Call Variations
|
||||||
using BioSequences
|
using BioSequences
|
||||||
using BioAlignments
|
using BioAlignments
|
||||||
import BioAlignments: OP_START, OP_SEQ_MATCH, OP_SEQ_MISMATCH, OP_INSERT, OP_DELETE
|
import BioAlignments: OP_START, OP_SEQ_MATCH, OP_SEQ_MISMATCH, OP_INSERT, OP_DELETE
|
||||||
|
import BioSymbols: BioSymbol
|
||||||
|
|
||||||
abstract type Variation end
|
abstract type Edit end
|
||||||
|
|
||||||
struct Substitution{T} <: Variation
|
struct Substitution{T <: BioSymbol} <: Edit
|
||||||
alt::T
|
symbol::T
|
||||||
end
|
end
|
||||||
|
|
||||||
struct Deletion <: Variation
|
struct Deletion <: Edit
|
||||||
len::Int
|
len::Int
|
||||||
end
|
end
|
||||||
|
|
||||||
struct Insertion{A} <: Variation
|
struct Insertion{S <: BioSequence} <: Edit
|
||||||
seq::LongSequence{A}
|
seq::S
|
||||||
end
|
end
|
||||||
|
|
||||||
Base.:(==)(x::Insertion{A}, y::Insertion{A}) where A = x.seq == y.seq
|
Base.:(==)(x::Insertion{A}, y::Insertion{A}) where A = x.seq == y.seq
|
||||||
|
|
||||||
# Metadata (such as sequence identifier) is intentionally left out
|
struct Diff{E <: Edit}
|
||||||
struct SeqVar{A <: Alphabet, V <: Variation}
|
|
||||||
ref::LongSequence{A}
|
|
||||||
pos::Int
|
pos::Int
|
||||||
var::V
|
edit::E
|
||||||
|
|
||||||
function SeqVar{A, V}(ref::LongSequence{A}, pos::Int, var::V) where {A <: Alphabet, V <: Variation}
|
|
||||||
n = new(ref, pos, var)
|
|
||||||
checkvar(n)
|
|
||||||
return n
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
function SeqVar{A}(ref::LongSequence{A}, pos::Int, var::Variation) where A
|
struct Variant{S <: BioSequence, E <: Edit}
|
||||||
return SeqVar{A,typeof(var)}(ref, pos, var)
|
ref::S
|
||||||
|
diffs::Vector{Diff{E}}
|
||||||
end
|
end
|
||||||
|
|
||||||
function SeqVar(ref::LongSequence{A}, pos::Int, var::Variation) where A
|
struct Variation{S <: BioSequence, E <: Edit}
|
||||||
return SeqVar{A, typeof(var)}(ref, pos, var)
|
ref::S
|
||||||
|
diff::Diff{E}
|
||||||
end
|
end
|
||||||
|
|
||||||
function checkvar(x::SeqVar{A, Deletion}) where A
|
###
|
||||||
checkbounds(x.ref, x.pos:x.pos + x.var.len - 1)
|
function check(v::Variation{S, <:Substitution{T}}) where {S, T}
|
||||||
|
T == eltype(S) || throw(TypeError(:check, "", eltype(S), T))
|
||||||
|
checkbounds(v.ref, v.diff.pos)
|
||||||
end
|
end
|
||||||
|
|
||||||
function checkvar(x::SeqVar{A, Substitution{T}}) where {A, T}
|
check(v::Variation{S, Deletion}) where S = checkbounds(v.ref, v.diff.pos:(v.diff.pos+v.diff.edit.len)-1)
|
||||||
if T !== eltype(A)
|
|
||||||
throw(ArgumentError("Substitution type must be alphabet eltype"))
|
function check(v::Variation{S, <:Insertion}) where S
|
||||||
end
|
length(v.diff.edit.seq) > 0 || throw(ArgumentError("Insertions cannot be length 0"))
|
||||||
checkbounds(x.ref, x.pos)
|
# We can have insertions at the very end, after the reference sequence
|
||||||
|
v.diff.pos == lastindex(v.ref) + 1 && return nothing
|
||||||
|
checkbounds(v.ref, v.diff.pos)
|
||||||
end
|
end
|
||||||
|
|
||||||
function checkvar(x::SeqVar{A, Insertion{A}}) where A
|
Base.show(io::IO, x::Diff{<:Substitution}) = print(io, x.pos, x.edit.symbol)
|
||||||
if length(x.var.seq) < 1
|
Base.show(io::IO, x::Diff{Deletion}) = print(io, 'Δ', x.pos, '-', x.pos + x.edit.len - 1)
|
||||||
throw(ArgumentError("Insertions cannot be empty"))
|
Base.show(io::IO, x::Diff{<:Insertion}) = print(io, x.pos, x.edit.seq)
|
||||||
end
|
|
||||||
if x.pos ∉ 1:lastindex(x.ref)+1
|
function Base.show(io::IO, x::Variant{S, <:Substitution}) where S
|
||||||
throw(BoundsError(x.ref, x.pos))
|
print(io, x.ref[x.diff.pos], x.diff.pos, x.diff.edit.symbol)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
function Base.show(io::IO, x::SeqVar{A, Deletion}) where A
|
Base.show(io::IO, x::Variant{S, Deletion}) where S = show(io, x.diff)
|
||||||
print(io, 'Δ', x.pos)
|
Base.show(io::IO, x::Variant{S, <:Insertion} where S) = show(io, x.diff)
|
||||||
if x.var.len > 1
|
|
||||||
print(io, '-', x.pos + x.var.len - 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.show(io::IO, x::SeqVar{A, <:Substitution}) where A
|
Base.:(==)(x::T, y::T) where {T <: Variant} = (x.ref === y.ref) & (x.diff == y.diff)
|
||||||
print(io, x.ref[x.pos], x.pos, x.var.alt)
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.show(io::IO, x::SeqVar{A, Insertion{A}}) where A
|
#################
|
||||||
print(io, x.pos, x.var.seq)
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.:(==)(x::T, y::T) where {T <: SeqVar}
|
|
||||||
return x.ref === y.ref && x.pos == y.pos && x.var == y.var
|
|
||||||
end
|
|
||||||
|
|
||||||
|
#=
|
||||||
function variations(ref::S, refaln::S, seqaln::S) where {S <: BioSequence}
|
function variations(ref::S, refaln::S, seqaln::S) where {S <: BioSequence}
|
||||||
aln = AlignedSequence(seqaln, refaln)
|
aln = AlignedSequence(seqaln, refaln)
|
||||||
return variations(ref, refaln, seqaln, aln.aln.anchors)
|
return variations(ref, refaln, seqaln, aln.aln.anchors)
|
||||||
|
@ -214,5 +200,7 @@ function reconstruct(v::Vector{<:SeqVar{A}}) where A
|
||||||
oldpos, newpos = 1, 1
|
oldpos, newpos = 1, 1
|
||||||
for i in srt
|
for i in srt
|
||||||
=#
|
=#
|
||||||
|
=#
|
||||||
|
|
||||||
end
|
|
||||||
|
end # module
|
||||||
|
|
Loading…
Reference in a new issue