mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-22 05:19:55 +00:00
Move Variation-related code to Variation.jl
This commit is contained in:
parent
37965ac7eb
commit
8c2dd271f3
2 changed files with 135 additions and 133 deletions
|
@ -33,139 +33,7 @@ struct Inapplicable end
|
|||
|
||||
include("Edit.jl")
|
||||
include("Variant.jl")
|
||||
|
||||
struct Variation{S <: BioSequence, T <: BioSymbol}
|
||||
ref::S
|
||||
edit::Edit{S, T}
|
||||
|
||||
function Variation{S, T}(ref::S, e::Edit{S, T}, ::Unsafe) where {S <: BioSequence, T <: BioSymbol}
|
||||
new(ref, e)
|
||||
end
|
||||
end
|
||||
|
||||
function Variation{S, T}(ref::S, e::Edit{S, T}) where {S <: BioSequence, T <: BioSymbol}
|
||||
v = Variation{S, T}(ref, e, Unsafe())
|
||||
is_valid(v) ? v : throw(ArgumentError("Invalid variant"))
|
||||
end
|
||||
|
||||
Variation(ref::S, edit::Edit{S, T}) where {S, T} = Variation{S, T}(ref, edit)
|
||||
|
||||
function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
||||
T = eltype(ref)
|
||||
|
||||
e = parse(Edit{S,T}, edit)
|
||||
return Variation{S,T}(ref, e)
|
||||
end
|
||||
|
||||
function Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence, T<:BioSymbol}
|
||||
edits = edit.(vars)
|
||||
return Variant{S, T}(ref, edits)
|
||||
end
|
||||
|
||||
reference(v::Variation) = v.ref
|
||||
edit(v::Variation) = v.edit
|
||||
mutation(v::Variation) = mutation(edit(v))
|
||||
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
|
||||
BioGenerics.rightposition(v::Variation) = rightposition(edit(v))
|
||||
Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit
|
||||
Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h))
|
||||
|
||||
function is_valid(v::Variation)
|
||||
isempty(v.ref) && return false
|
||||
op = v.edit.x
|
||||
pos = v.edit.pos
|
||||
if op isa Substitution
|
||||
return pos in eachindex(v.ref)
|
||||
elseif op isa Insertion
|
||||
return pos in 0:lastindex(v.ref)+1
|
||||
elseif op isa Deletion
|
||||
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
||||
end
|
||||
end
|
||||
|
||||
function Base.show(io::IO, x::Variation)
|
||||
content = x.edit.x
|
||||
pos = x.edit.pos
|
||||
if content isa Substitution
|
||||
print(io, x.ref[pos], pos, content.x)
|
||||
elseif content isa Deletion
|
||||
print(io, 'Δ', pos, '-', pos + content.len - 1)
|
||||
elseif content isa Insertion
|
||||
print(io, pos, content.seq)
|
||||
else
|
||||
print(io, pos, content.x)
|
||||
end
|
||||
end
|
||||
|
||||
function Base.in(v::Variation, var::Variant)
|
||||
if v.ref != var.ref
|
||||
error("References must be equal")
|
||||
end
|
||||
any(v.edit == edit for edit in var.edits)
|
||||
end
|
||||
|
||||
function translate(var::Variation{S, T}, aln::PairwiseAlignment{S, S}) where {S, T}
|
||||
kind = var.edit.x
|
||||
pos = var.edit.pos
|
||||
seq, ref = aln.seq, aln.b
|
||||
|
||||
# Special case: Insertions may have a pos of 0, which cannot be mapped to
|
||||
# the seq using ref2seq
|
||||
if iszero(pos)
|
||||
(s, r), _ = iterate(aln)
|
||||
(isgap(s) | isgap(r)) && return Inapplicable()
|
||||
return Variation{S, T}(seq, Edit{S, T}(Insertion(var.edit.x), 0))
|
||||
end
|
||||
|
||||
(seqpos, op) = BA.ref2seq(aln, pos)
|
||||
if kind isa Substitution
|
||||
# If it's a substitution, return nothing if it maps to a deleted
|
||||
# position, or substitutes to same base.
|
||||
op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH) || return nothing
|
||||
seq[seqpos] == kind.x && return nothing
|
||||
edit = Edit{S, T}(kind, seqpos)
|
||||
return Variation{S, T}(seq, edit, Unsafe())
|
||||
elseif kind isa Deletion
|
||||
# If it's a deletion, return nothing if the deleted part is already missing
|
||||
# from the new reference.
|
||||
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
||||
start = seqpos + op == BA.OP_DELETE
|
||||
start < stop && return nothing
|
||||
edit = Edit{S, T}(Deletion(stop - start + 1), start)
|
||||
return Variation{S, T}(seq, edit, Unsafe())
|
||||
else
|
||||
# If it maps directly to a symbol, just insert
|
||||
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
||||
# This happens if there is already an insertion at the position
|
||||
if pos != lastindex(ref) && first(ref2seq(aln, pos+1)) != seqpos + 1
|
||||
return Inapplicable()
|
||||
else
|
||||
edit = Edit{S, T}(Insertion(var.edit.x), seqpos)
|
||||
return Variation{S, T}(seq, edit, Unsafe())
|
||||
end
|
||||
# Alternatively, it can map to a deletion. In that case, it become really
|
||||
# tricky to talk about the "same" insertion.
|
||||
else
|
||||
return Inapplicable()
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function variations(v::Variant{S,T}) where {S,T}
|
||||
vs = Vector{Variation{S,T}}(undef, length(edits(v)))
|
||||
for (i, e) in enumerate(edits(v))
|
||||
vs[i] = Variation{S,T}(reference(v), e)
|
||||
end
|
||||
return vs
|
||||
end
|
||||
|
||||
function refbases(v::Variation)
|
||||
return _refbases(mutation(v), reference(v), leftposition(v))
|
||||
end
|
||||
|
||||
function altbases(v::Variation)
|
||||
return _altbases(mutation(v), reference(v), leftposition(v))
|
||||
end
|
||||
include("Variation.jl")
|
||||
|
||||
export Insertion,
|
||||
Deletion,
|
||||
|
|
134
src/Variation.jl
Normal file
134
src/Variation.jl
Normal file
|
@ -0,0 +1,134 @@
|
|||
struct Variation{S<:BioSequence,T<:BioSymbol}
|
||||
ref::S
|
||||
edit::Edit{S,T}
|
||||
|
||||
function Variation{S,T}(
|
||||
ref::S, e::Edit{S,T}, ::Unsafe
|
||||
) where {S<:BioSequence,T<:BioSymbol}
|
||||
return new(ref, e)
|
||||
end
|
||||
end
|
||||
|
||||
function Variation{S,T}(ref::S, e::Edit{S,T}) where {S<:BioSequence,T<:BioSymbol}
|
||||
v = Variation{S,T}(ref, e, Unsafe())
|
||||
return is_valid(v) ? v : throw(ArgumentError("Invalid variant"))
|
||||
end
|
||||
|
||||
Variation(ref::S, edit::Edit{S,T}) where {S,T} = Variation{S,T}(ref, edit)
|
||||
|
||||
function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
||||
T = eltype(ref)
|
||||
|
||||
e = parse(Edit{S,T}, edit)
|
||||
return Variation{S,T}(ref, e)
|
||||
end
|
||||
|
||||
function Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||
edits = edit.(vars)
|
||||
return Variant{S,T}(ref, edits)
|
||||
end
|
||||
|
||||
reference(v::Variation) = v.ref
|
||||
edit(v::Variation) = v.edit
|
||||
mutation(v::Variation) = mutation(edit(v))
|
||||
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
|
||||
BioGenerics.rightposition(v::Variation) = rightposition(edit(v))
|
||||
Base.:(==)(x::Variation, y::Variation) = x.ref == y.ref && x.edit == y.edit
|
||||
Base.hash(x::Variation, h::UInt) = hash(Variation, hash((x.ref, x.edit), h))
|
||||
|
||||
function is_valid(v::Variation)
|
||||
isempty(v.ref) && return false
|
||||
op = v.edit.x
|
||||
pos = v.edit.pos
|
||||
if op isa Substitution
|
||||
return pos in eachindex(v.ref)
|
||||
elseif op isa Insertion
|
||||
return pos in 0:(lastindex(v.ref) + 1)
|
||||
elseif op isa Deletion
|
||||
return pos in 1:(lastindex(v.ref) - length(op) + 1)
|
||||
end
|
||||
end
|
||||
|
||||
function Base.show(io::IO, x::Variation)
|
||||
content = x.edit.x
|
||||
pos = x.edit.pos
|
||||
if content isa Substitution
|
||||
print(io, x.ref[pos], pos, content.x)
|
||||
elseif content isa Deletion
|
||||
print(io, 'Δ', pos, '-', pos + content.len - 1)
|
||||
elseif content isa Insertion
|
||||
print(io, pos, content.seq)
|
||||
else
|
||||
print(io, pos, content.x)
|
||||
end
|
||||
end
|
||||
|
||||
function Base.in(v::Variation, var::Variant)
|
||||
if v.ref != var.ref
|
||||
error("References must be equal")
|
||||
end
|
||||
return any(v.edit == edit for edit in var.edits)
|
||||
end
|
||||
|
||||
function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
||||
kind = var.edit.x
|
||||
pos = var.edit.pos
|
||||
seq, ref = aln.seq, aln.b
|
||||
|
||||
# Special case: Insertions may have a pos of 0, which cannot be mapped to
|
||||
# the seq using ref2seq
|
||||
if iszero(pos)
|
||||
(s, r), _ = iterate(aln)
|
||||
(isgap(s) | isgap(r)) && return Inapplicable()
|
||||
return Variation{S,T}(seq, Edit{S,T}(Insertion(var.edit.x), 0))
|
||||
end
|
||||
|
||||
(seqpos, op) = BA.ref2seq(aln, pos)
|
||||
if kind isa Substitution
|
||||
# If it's a substitution, return nothing if it maps to a deleted
|
||||
# position, or substitutes to same base.
|
||||
op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH) || return nothing
|
||||
seq[seqpos] == kind.x && return nothing
|
||||
edit = Edit{S,T}(kind, seqpos)
|
||||
return Variation{S,T}(seq, edit, Unsafe())
|
||||
elseif kind isa Deletion
|
||||
# If it's a deletion, return nothing if the deleted part is already missing
|
||||
# from the new reference.
|
||||
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
||||
start = seqpos + op == BA.OP_DELETE
|
||||
start < stop && return nothing
|
||||
edit = Edit{S,T}(Deletion(stop - start + 1), start)
|
||||
return Variation{S,T}(seq, edit, Unsafe())
|
||||
else
|
||||
# If it maps directly to a symbol, just insert
|
||||
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
||||
# This happens if there is already an insertion at the position
|
||||
if pos != lastindex(ref) && first(ref2seq(aln, pos + 1)) != seqpos + 1
|
||||
return Inapplicable()
|
||||
else
|
||||
edit = Edit{S,T}(Insertion(var.edit.x), seqpos)
|
||||
return Variation{S,T}(seq, edit, Unsafe())
|
||||
end
|
||||
# Alternatively, it can map to a deletion. In that case, it become really
|
||||
# tricky to talk about the "same" insertion.
|
||||
else
|
||||
return Inapplicable()
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function variations(v::Variant{S,T}) where {S,T}
|
||||
vs = Vector{Variation{S,T}}(undef, length(edits(v)))
|
||||
for (i, e) in enumerate(edits(v))
|
||||
vs[i] = Variation{S,T}(reference(v), e)
|
||||
end
|
||||
return vs
|
||||
end
|
||||
|
||||
function refbases(v::Variation)
|
||||
return _refbases(mutation(v), reference(v), leftposition(v))
|
||||
end
|
||||
|
||||
function altbases(v::Variation)
|
||||
return _altbases(mutation(v), reference(v), leftposition(v))
|
||||
end
|
Loading…
Reference in a new issue