mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-22 13:29:56 +00:00
Move Variation-related code to Variation.jl
This commit is contained in:
parent
37965ac7eb
commit
8c2dd271f3
2 changed files with 135 additions and 133 deletions
|
@ -33,139 +33,7 @@ struct Inapplicable end
|
||||||
|
|
||||||
include("Edit.jl")
|
include("Edit.jl")
|
||||||
include("Variant.jl")
|
include("Variant.jl")
|
||||||
|
include("Variation.jl")
|
||||||
struct Variation{S <: BioSequence, T <: BioSymbol}
|
|
||||||
ref::S
|
|
||||||
edit::Edit{S, T}
|
|
||||||
|
|
||||||
function Variation{S, T}(ref::S, e::Edit{S, T}, ::Unsafe) where {S <: BioSequence, T <: BioSymbol}
|
|
||||||
new(ref, e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function Variation{S, T}(ref::S, e::Edit{S, T}) where {S <: BioSequence, T <: BioSymbol}
|
|
||||||
v = Variation{S, T}(ref, e, Unsafe())
|
|
||||||
is_valid(v) ? v : throw(ArgumentError("Invalid variant"))
|
|
||||||
end
|
|
||||||
|
|
||||||
Variation(ref::S, edit::Edit{S, T}) where {S, T} = Variation{S, T}(ref, edit)
|
|
||||||
|
|
||||||
function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
|
||||||
T = eltype(ref)
|
|
||||||
|
|
||||||
e = parse(Edit{S,T}, edit)
|
|
||||||
return Variation{S,T}(ref, e)
|
|
||||||
end
|
|
||||||
|
|
||||||
function Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence, T<:BioSymbol}
|
|
||||||
edits = edit.(vars)
|
|
||||||
return Variant{S, T}(ref, edits)
|
|
||||||
end
|
|
||||||
|
|
||||||
reference(v::Variation) = v.ref
|
|
||||||
edit(v::Variation) = v.edit
|
|
||||||
mutation(v::Variation) = mutation(edit(v))
|
|
||||||
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
|
|
||||||
BioGenerics.rightposition(v::Variation) = rightposition(edit(v))
|
|
||||||
Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit
|
|
||||||
Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h))
|
|
||||||
|
|
||||||
function is_valid(v::Variation)
|
|
||||||
isempty(v.ref) && return false
|
|
||||||
op = v.edit.x
|
|
||||||
pos = v.edit.pos
|
|
||||||
if op isa Substitution
|
|
||||||
return pos in eachindex(v.ref)
|
|
||||||
elseif op isa Insertion
|
|
||||||
return pos in 0:lastindex(v.ref)+1
|
|
||||||
elseif op isa Deletion
|
|
||||||
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.show(io::IO, x::Variation)
|
|
||||||
content = x.edit.x
|
|
||||||
pos = x.edit.pos
|
|
||||||
if content isa Substitution
|
|
||||||
print(io, x.ref[pos], pos, content.x)
|
|
||||||
elseif content isa Deletion
|
|
||||||
print(io, 'Δ', pos, '-', pos + content.len - 1)
|
|
||||||
elseif content isa Insertion
|
|
||||||
print(io, pos, content.seq)
|
|
||||||
else
|
|
||||||
print(io, pos, content.x)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.in(v::Variation, var::Variant)
|
|
||||||
if v.ref != var.ref
|
|
||||||
error("References must be equal")
|
|
||||||
end
|
|
||||||
any(v.edit == edit for edit in var.edits)
|
|
||||||
end
|
|
||||||
|
|
||||||
function translate(var::Variation{S, T}, aln::PairwiseAlignment{S, S}) where {S, T}
|
|
||||||
kind = var.edit.x
|
|
||||||
pos = var.edit.pos
|
|
||||||
seq, ref = aln.seq, aln.b
|
|
||||||
|
|
||||||
# Special case: Insertions may have a pos of 0, which cannot be mapped to
|
|
||||||
# the seq using ref2seq
|
|
||||||
if iszero(pos)
|
|
||||||
(s, r), _ = iterate(aln)
|
|
||||||
(isgap(s) | isgap(r)) && return Inapplicable()
|
|
||||||
return Variation{S, T}(seq, Edit{S, T}(Insertion(var.edit.x), 0))
|
|
||||||
end
|
|
||||||
|
|
||||||
(seqpos, op) = BA.ref2seq(aln, pos)
|
|
||||||
if kind isa Substitution
|
|
||||||
# If it's a substitution, return nothing if it maps to a deleted
|
|
||||||
# position, or substitutes to same base.
|
|
||||||
op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH) || return nothing
|
|
||||||
seq[seqpos] == kind.x && return nothing
|
|
||||||
edit = Edit{S, T}(kind, seqpos)
|
|
||||||
return Variation{S, T}(seq, edit, Unsafe())
|
|
||||||
elseif kind isa Deletion
|
|
||||||
# If it's a deletion, return nothing if the deleted part is already missing
|
|
||||||
# from the new reference.
|
|
||||||
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
|
||||||
start = seqpos + op == BA.OP_DELETE
|
|
||||||
start < stop && return nothing
|
|
||||||
edit = Edit{S, T}(Deletion(stop - start + 1), start)
|
|
||||||
return Variation{S, T}(seq, edit, Unsafe())
|
|
||||||
else
|
|
||||||
# If it maps directly to a symbol, just insert
|
|
||||||
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
|
||||||
# This happens if there is already an insertion at the position
|
|
||||||
if pos != lastindex(ref) && first(ref2seq(aln, pos+1)) != seqpos + 1
|
|
||||||
return Inapplicable()
|
|
||||||
else
|
|
||||||
edit = Edit{S, T}(Insertion(var.edit.x), seqpos)
|
|
||||||
return Variation{S, T}(seq, edit, Unsafe())
|
|
||||||
end
|
|
||||||
# Alternatively, it can map to a deletion. In that case, it become really
|
|
||||||
# tricky to talk about the "same" insertion.
|
|
||||||
else
|
|
||||||
return Inapplicable()
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function variations(v::Variant{S,T}) where {S,T}
|
|
||||||
vs = Vector{Variation{S,T}}(undef, length(edits(v)))
|
|
||||||
for (i, e) in enumerate(edits(v))
|
|
||||||
vs[i] = Variation{S,T}(reference(v), e)
|
|
||||||
end
|
|
||||||
return vs
|
|
||||||
end
|
|
||||||
|
|
||||||
function refbases(v::Variation)
|
|
||||||
return _refbases(mutation(v), reference(v), leftposition(v))
|
|
||||||
end
|
|
||||||
|
|
||||||
function altbases(v::Variation)
|
|
||||||
return _altbases(mutation(v), reference(v), leftposition(v))
|
|
||||||
end
|
|
||||||
|
|
||||||
export Insertion,
|
export Insertion,
|
||||||
Deletion,
|
Deletion,
|
||||||
|
|
134
src/Variation.jl
Normal file
134
src/Variation.jl
Normal file
|
@ -0,0 +1,134 @@
|
||||||
|
struct Variation{S<:BioSequence,T<:BioSymbol}
|
||||||
|
ref::S
|
||||||
|
edit::Edit{S,T}
|
||||||
|
|
||||||
|
function Variation{S,T}(
|
||||||
|
ref::S, e::Edit{S,T}, ::Unsafe
|
||||||
|
) where {S<:BioSequence,T<:BioSymbol}
|
||||||
|
return new(ref, e)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Variation{S,T}(ref::S, e::Edit{S,T}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
|
v = Variation{S,T}(ref, e, Unsafe())
|
||||||
|
return is_valid(v) ? v : throw(ArgumentError("Invalid variant"))
|
||||||
|
end
|
||||||
|
|
||||||
|
Variation(ref::S, edit::Edit{S,T}) where {S,T} = Variation{S,T}(ref, edit)
|
||||||
|
|
||||||
|
function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
||||||
|
T = eltype(ref)
|
||||||
|
|
||||||
|
e = parse(Edit{S,T}, edit)
|
||||||
|
return Variation{S,T}(ref, e)
|
||||||
|
end
|
||||||
|
|
||||||
|
function Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
|
edits = edit.(vars)
|
||||||
|
return Variant{S,T}(ref, edits)
|
||||||
|
end
|
||||||
|
|
||||||
|
reference(v::Variation) = v.ref
|
||||||
|
edit(v::Variation) = v.edit
|
||||||
|
mutation(v::Variation) = mutation(edit(v))
|
||||||
|
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
|
||||||
|
BioGenerics.rightposition(v::Variation) = rightposition(edit(v))
|
||||||
|
Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit
|
||||||
|
Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h))
|
||||||
|
|
||||||
|
function is_valid(v::Variation)
|
||||||
|
isempty(v.ref) && return false
|
||||||
|
op = v.edit.x
|
||||||
|
pos = v.edit.pos
|
||||||
|
if op isa Substitution
|
||||||
|
return pos in eachindex(v.ref)
|
||||||
|
elseif op isa Insertion
|
||||||
|
return pos in 0:(lastindex(v.ref) + 1)
|
||||||
|
elseif op isa Deletion
|
||||||
|
return pos in 1:(lastindex(v.ref) - length(op) + 1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Base.show(io::IO, x::Variation)
|
||||||
|
content = x.edit.x
|
||||||
|
pos = x.edit.pos
|
||||||
|
if content isa Substitution
|
||||||
|
print(io, x.ref[pos], pos, content.x)
|
||||||
|
elseif content isa Deletion
|
||||||
|
print(io, 'Δ', pos, '-', pos + content.len - 1)
|
||||||
|
elseif content isa Insertion
|
||||||
|
print(io, pos, content.seq)
|
||||||
|
else
|
||||||
|
print(io, pos, content.x)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Base.in(v::Variation, var::Variant)
|
||||||
|
if v.ref != var.ref
|
||||||
|
error("References must be equal")
|
||||||
|
end
|
||||||
|
return any(v.edit == edit for edit in var.edits)
|
||||||
|
end
|
||||||
|
|
||||||
|
function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
||||||
|
kind = var.edit.x
|
||||||
|
pos = var.edit.pos
|
||||||
|
seq, ref = aln.seq, aln.b
|
||||||
|
|
||||||
|
# Special case: Insertions may have a pos of 0, which cannot be mapped to
|
||||||
|
# the seq using ref2seq
|
||||||
|
if iszero(pos)
|
||||||
|
(s, r), _ = iterate(aln)
|
||||||
|
(isgap(s) | isgap(r)) && return Inapplicable()
|
||||||
|
return Variation{S,T}(seq, Edit{S,T}(Insertion(var.edit.x), 0))
|
||||||
|
end
|
||||||
|
|
||||||
|
(seqpos, op) = BA.ref2seq(aln, pos)
|
||||||
|
if kind isa Substitution
|
||||||
|
# If it's a substitution, return nothing if it maps to a deleted
|
||||||
|
# position, or substitutes to same base.
|
||||||
|
op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH) || return nothing
|
||||||
|
seq[seqpos] == kind.x && return nothing
|
||||||
|
edit = Edit{S,T}(kind, seqpos)
|
||||||
|
return Variation{S,T}(seq, edit, Unsafe())
|
||||||
|
elseif kind isa Deletion
|
||||||
|
# If it's a deletion, return nothing if the deleted part is already missing
|
||||||
|
# from the new reference.
|
||||||
|
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
||||||
|
start = seqpos + op == BA.OP_DELETE
|
||||||
|
start < stop && return nothing
|
||||||
|
edit = Edit{S,T}(Deletion(stop - start + 1), start)
|
||||||
|
return Variation{S,T}(seq, edit, Unsafe())
|
||||||
|
else
|
||||||
|
# If it maps directly to a symbol, just insert
|
||||||
|
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
||||||
|
# This happens if there is already an insertion at the position
|
||||||
|
if pos != lastindex(ref) && first(ref2seq(aln, pos + 1)) != seqpos + 1
|
||||||
|
return Inapplicable()
|
||||||
|
else
|
||||||
|
edit = Edit{S,T}(Insertion(var.edit.x), seqpos)
|
||||||
|
return Variation{S,T}(seq, edit, Unsafe())
|
||||||
|
end
|
||||||
|
# Alternatively, it can map to a deletion. In that case, it become really
|
||||||
|
# tricky to talk about the "same" insertion.
|
||||||
|
else
|
||||||
|
return Inapplicable()
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function variations(v::Variant{S,T}) where {S,T}
|
||||||
|
vs = Vector{Variation{S,T}}(undef, length(edits(v)))
|
||||||
|
for (i, e) in enumerate(edits(v))
|
||||||
|
vs[i] = Variation{S,T}(reference(v), e)
|
||||||
|
end
|
||||||
|
return vs
|
||||||
|
end
|
||||||
|
|
||||||
|
function refbases(v::Variation)
|
||||||
|
return _refbases(mutation(v), reference(v), leftposition(v))
|
||||||
|
end
|
||||||
|
|
||||||
|
function altbases(v::Variation)
|
||||||
|
return _altbases(mutation(v), reference(v), leftposition(v))
|
||||||
|
end
|
Loading…
Reference in a new issue