Add parsing

This commit is contained in:
Jakob Nybo Nissen 2021-03-02 17:55:17 +01:00
parent 354f18e0e4
commit 642658f592

View file

@ -42,6 +42,12 @@ outside this struct.
""" """
Substitution Substitution
function Base.parse(::Type{Substitution{S, T}}, s::AbstractString) where {S, T}
mat = match(r"^[A-Z]([0-9]+)([A-Z])$", strip(s))
symbol = T(first(mat[2]))
Diff(parse(UInt, mat[1]), Substitution{S, T}(symbol))
end
Base.eltype(::Type{<:Substitution{S, T}}) where {S, T} = T Base.eltype(::Type{<:Substitution{S, T}}) where {S, T} = T
""" """
@ -52,6 +58,14 @@ outside this struct
""" """
Deletion Deletion
function Base.parse(::Type{T}, s::AbstractString) where {T <: Deletion}
mat = match(r"^Δ([0-9]+)-([0-9]+)$", strip(s))
start = parse(UInt, mat[1])
stop = parse(UInt, mat[2])
start stop || error("Indel cannot have negative range")
Diff(start, T(stop - start + 1))
end
""" """
Insertion Insertion
@ -60,6 +74,12 @@ is stored outside the struct.
""" """
Insertion Insertion
function Base.parse(::Type{<:Insertion{S}}, s::AbstractString) where S
mat = match(r"^([0-9]+)([A-Z]+)$", strip(s))
seq = S(mat[2])
Diff(parse(UInt, mat[1]), Insertion(seq))
end
function Insertion(s::BioSequence) function Insertion(s::BioSequence)
length(s) == 0 && throw(ArgumentError("Insertions cannot be length 0")) length(s) == 0 && throw(ArgumentError("Insertions cannot be length 0"))
Insertion{typeof(s), eltype(s)}(s) Insertion{typeof(s), eltype(s)}(s)
@ -77,6 +97,17 @@ struct Diff{S <: BioSequence, T <: BioSymbol}
edit::Edit{S, T} edit::Edit{S, T}
end end
function Base.parse(::Type{Diff{S, T}}, s::AbstractString) where {S, T}
beginning = first(s)
if beginning == 'Δ'
parse(Deletion{S, T}, s)
elseif isnumeric(beginning)
parse(Insertion{S, T}, s)
else
parse(Substitution{S, T}, s)
end
end
""" """
Variant{S <: BioSequence, T <: BioSymbol} Variant{S <: BioSequence, T <: BioSymbol}