Merge pull request #9 from MillironX/feature/variations-parse

This commit is contained in:
Thomas A. Christensen II 2022-06-30 16:22:20 -05:00 committed by GitHub
commit 5f7d5e59a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 9 deletions

View file

@ -23,9 +23,9 @@ version = "2.0.0"
[[BioGenerics]]
deps = ["TranscodingStreams"]
git-tree-sha1 = "57deb413ca9f4c8bc7d4c6e98ebe217ff728c737"
git-tree-sha1 = "6d3f3b474b3df2e83dc67ad12ec63aee4eb5241b"
uuid = "47718e42-2ac5-11e9-14af-e5595289c2ea"
version = "0.1.0"
version = "0.1.1"
[[BioSequences]]
deps = ["BioGenerics", "BioSymbols", "Combinatorics", "IndexableBitVectors", "Printf", "Random", "StableRNGs", "Twiddle"]
@ -50,6 +50,10 @@ git-tree-sha1 = "8cd7b7d1c7f6fcbe7e8743a58adf57788ec7f787"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.18.0"
[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[DataStructures]]
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "0347f23484a96d56e7096eb1f55c6975be34b11a"
@ -69,9 +73,12 @@ deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
[[Downloads]]
deps = ["ArgTools", "LibCURL", "NetworkOptions"]
deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
[[IndexableBitVectors]]
deps = ["Random", "Test"]
git-tree-sha1 = "b7f5e42dc867b8a8654a5f899064632dac05bc82"
@ -108,7 +115,7 @@ uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[LinearAlgebra]]
deps = ["Libdl"]
deps = ["Libdl", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[Logging]]
@ -131,13 +138,17 @@ uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
[[OrderedCollections]]
git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b"
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
version = "1.3.1"
[[Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[Printf]]
@ -153,7 +164,7 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[Random]]
deps = ["Serialization"]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[SHA]]
@ -217,6 +228,14 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[libblastrampoline_jll]]
deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
[[nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"

View file

@ -5,5 +5,14 @@ version = "0.1.0"
[deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
[extras]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
test = ["Test", "BioSequences", "BioAlignments"]

View file

@ -20,9 +20,10 @@ TODO now:
* Add tests
"""
using BioSymbols: BioSymbol
using BioAlignments: BioAlignments, PairwiseAlignment
using BioGenerics: BioGenerics, leftposition
using BioSequences: BioSequences, BioSequence, NucleotideSeq, AminoAcidSeq, LongSequence, isgap
using BioSymbols: BioSymbol
const BA = BioAlignments
const BS = BioSequences
@ -133,6 +134,9 @@ function Base.parse(::Type{<:Edit{Se, Sy}}, s::Union{String, SubString{String}})
end
end
mutation(e::Edit) = e.x
BioGenerics.leftposition(e::Edit) = e.pos
#=
@noinline throw_parse_error(T, p::Integer) = error("Failed to parse $T at byte $p")
@ -341,6 +345,18 @@ end
Variation(ref::S, edit::Edit{S, T}) where {S, T} = Variation{S, T}(ref, edit)
function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
T = eltype(ref)
e = parse(Edit{S,T}, edit)
return Variation{S,T}(ref, e)
end
reference(v::Variation) = v.reference
edit(v::Variation) = v.edit
mutation(v::Variation) = mutation(edit(v))
BioGenerics.leftposition(v::Variation) = leftposition(edit(v))
function is_valid(v::Variation)
isempty(v.ref) && return false
op = v.edit.x
@ -426,6 +442,8 @@ export Insertion,
Deletion,
Substitution,
Variant,
Variation
Variation,
reference,
mutation
end # module

View file

@ -23,9 +23,10 @@ TODO now:
* Add tests
"""
using BioSequences
using BioAlignments
using BioSequences
using SequenceVariation
using Test
const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL, gap_open=-25, gap_extend=-2)
@ -44,3 +45,15 @@ var = Variant(align(seq1, seq2))
aln02 = PairwiseAlignment(read02, refseq)
@test Variant(aln01).edits == Variant(aln02).edits
end
@testset "VariationParsing" begin
refseq = dna"ACAACTTTATCT"
sub = Variation(refseq, "A4T")
del = Variation(refseq, "Δ4-5")
ins = Variation(refseq, "4TT")
@test mutation(sub) isa Substitution
@test mutation(del) isa Deletion
@test mutation(ins) isa Insertion
end