diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index d16ee4a..7dd505d 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -42,6 +42,12 @@ outside this struct. """ Substitution +function Base.parse(::Type{Substitution{S, T}}, s::AbstractString) where {S, T} + mat = match(r"^[A-Z]([0-9]+)([A-Z])$", strip(s)) + symbol = T(first(mat[2])) + Diff(parse(UInt, mat[1]), Substitution{S, T}(symbol)) +end + Base.eltype(::Type{<:Substitution{S, T}}) where {S, T} = T """ @@ -52,6 +58,14 @@ outside this struct """ Deletion +function Base.parse(::Type{T}, s::AbstractString) where {T <: Deletion} + mat = match(r"^Δ([0-9]+)-([0-9]+)$", strip(s)) + start = parse(UInt, mat[1]) + stop = parse(UInt, mat[2]) + start ≤ stop || error("Indel cannot have negative range") + Diff(start, T(stop - start + 1)) +end + """ Insertion @@ -60,6 +74,12 @@ is stored outside the struct. """ Insertion +function Base.parse(::Type{<:Insertion{S}}, s::AbstractString) where S + mat = match(r"^([0-9]+)([A-Z]+)$", strip(s)) + seq = S(mat[2]) + Diff(parse(UInt, mat[1]), Insertion(seq)) +end + function Insertion(s::BioSequence) length(s) == 0 && throw(ArgumentError("Insertions cannot be length 0")) Insertion{typeof(s), eltype(s)}(s) @@ -77,6 +97,17 @@ struct Diff{S <: BioSequence, T <: BioSymbol} edit::Edit{S, T} end +function Base.parse(::Type{Diff{S, T}}, s::AbstractString) where {S, T} + beginning = first(s) + if beginning == 'Δ' + parse(Deletion{S, T}, s) + elseif isnumeric(beginning) + parse(Insertion{S, T}, s) + else + parse(Substitution{S, T}, s) + end +end + """ Variant{S <: BioSequence, T <: BioSymbol}