From 76d5b6059e922dc80f68ee0aca7fa384242576b4 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 14:48:39 -0500 Subject: [PATCH 1/6] Add Variation constructor based on edit parsing --- src/SequenceVariation.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index b8a630a..902e63f 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -341,6 +341,13 @@ end Variation(ref::S, edit::Edit{S, T}) where {S, T} = Variation{S, T}(ref, edit) +function Variation(ref::S, edit::AbstractString) where {S<:BioSequence} + T = eltype(ref) + + e = parse(Edit{S,T}, edit) + return Variation{S,T}(ref, e) +end + function is_valid(v::Variation) isempty(v.ref) && return false op = v.edit.x From 9967ff4df26f2639a7815536f62dbe2b2aaac0de Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 15:36:58 -0500 Subject: [PATCH 2/6] Add test dependencies to project file --- Project.toml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Project.toml b/Project.toml index 0cf160b..79e6fb8 100644 --- a/Project.toml +++ b/Project.toml @@ -7,3 +7,11 @@ version = "0.1.0" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" +BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" + +[targets] +test = ["Test", "BioSequences", "BioAlignments"] From 101f4016353ea4e47a7e36d85695e6adfbcfe05b Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:15:04 -0500 Subject: [PATCH 3/6] Add BioGenerics dependency --- Manifest.toml | 31 +++++++++++++++++++++++++------ Project.toml | 3 ++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 2870860..269e2b2 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -23,9 +23,9 @@ version = "2.0.0" [[BioGenerics]] deps = ["TranscodingStreams"] -git-tree-sha1 = "57deb413ca9f4c8bc7d4c6e98ebe217ff728c737" +git-tree-sha1 = "6d3f3b474b3df2e83dc67ad12ec63aee4eb5241b" uuid = "47718e42-2ac5-11e9-14af-e5595289c2ea" -version = "0.1.0" +version = "0.1.1" [[BioSequences]] deps = ["BioGenerics", "BioSymbols", "Combinatorics", "IndexableBitVectors", "Printf", "Random", "StableRNGs", "Twiddle"] @@ -50,6 +50,10 @@ git-tree-sha1 = "8cd7b7d1c7f6fcbe7e8743a58adf57788ec7f787" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" version = "3.18.0" +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" + [[DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] git-tree-sha1 = "0347f23484a96d56e7096eb1f55c6975be34b11a" @@ -69,9 +73,12 @@ deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +[[FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + [[IndexableBitVectors]] deps = ["Random", "Test"] git-tree-sha1 = "b7f5e42dc867b8a8654a5f899064632dac05bc82" @@ -108,7 +115,7 @@ uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[LinearAlgebra]] -deps = ["Libdl"] +deps = ["Libdl", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[Logging]] @@ -131,13 +138,17 @@ uuid = "14a3606d-f60d-562e-9121-12d972cd8159" [[NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +[[OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" + [[OrderedCollections]] git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" version = "1.3.1" [[Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] @@ -153,7 +164,7 @@ deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[Random]] -deps = ["Serialization"] +deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[SHA]] @@ -217,6 +228,14 @@ uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +[[libblastrampoline_jll]] +deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" + [[nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/Project.toml b/Project.toml index 79e6fb8..49b3659 100644 --- a/Project.toml +++ b/Project.toml @@ -5,13 +5,14 @@ version = "0.1.0" [deps] BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" +BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" [extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test", "BioSequences", "BioAlignments"] From 1e6f6ea0756ad73d04da5c018e1939ff2af3cdb6 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:17:16 -0500 Subject: [PATCH 4/6] Add accessor functions for Edit properties --- src/SequenceVariation.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 902e63f..e8399a1 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -133,6 +133,9 @@ function Base.parse(::Type{<:Edit{Se, Sy}}, s::Union{String, SubString{String}}) end end +mutation(e::Edit) = e.x +BioGenerics.leftposition(e::Edit) = e.pos + #= @noinline throw_parse_error(T, p::Integer) = error("Failed to parse $T at byte $p") From 2d05b233a9222c925aadcd2b1b2155d5612904ee Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:20:39 -0500 Subject: [PATCH 5/6] Add accessor functions for Variation properties --- src/SequenceVariation.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index e8399a1..a5b256c 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -20,9 +20,10 @@ TODO now: * Add tests """ -using BioSymbols: BioSymbol using BioAlignments: BioAlignments, PairwiseAlignment +using BioGenerics: BioGenerics, leftposition using BioSequences: BioSequences, BioSequence, NucleotideSeq, AminoAcidSeq, LongSequence, isgap +using BioSymbols: BioSymbol const BA = BioAlignments const BS = BioSequences @@ -351,6 +352,11 @@ function Variation(ref::S, edit::AbstractString) where {S<:BioSequence} return Variation{S,T}(ref, e) end +reference(v::Variation) = v.reference +edit(v::Variation) = v.edit +mutation(v::Variation) = mutation(edit(v)) +BioGenerics.leftposition(v::Variation) = leftposition(edit(v)) + function is_valid(v::Variation) isempty(v.ref) && return false op = v.edit.x @@ -436,6 +442,8 @@ export Insertion, Deletion, Substitution, Variant, - Variation + Variation, + reference, + mutation end # module From 6bc1528d18ac2bbda9f48c2ebffe2cb51201e9a0 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 14 Jun 2022 16:24:55 -0500 Subject: [PATCH 6/6] Add tests for Variation parsing --- test/runtests.jl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 78e4d0b..280f18a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -23,9 +23,10 @@ TODO now: * Add tests """ -using BioSequences using BioAlignments +using BioSequences using SequenceVariation +using Test const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL, gap_open=-25, gap_extend=-2) @@ -44,3 +45,15 @@ var = Variant(align(seq1, seq2)) aln02 = PairwiseAlignment(read02, refseq) @test Variant(aln01).edits == Variant(aln02).edits end + +@testset "VariationParsing" begin + refseq = dna"ACAACTTTATCT" + + sub = Variation(refseq, "A4T") + del = Variation(refseq, "Δ4-5") + ins = Variation(refseq, "4TT") + + @test mutation(sub) isa Substitution + @test mutation(del) isa Deletion + @test mutation(ins) isa Insertion +end