From a80ff869ce1243a2c832abebbfc26fcf6c97fc24 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:53:35 -0500 Subject: [PATCH 01/11] Fix reference(::Variation) `reference(::Variation)` used the wrong field name. Fix that so I can use it downstream. --- src/SequenceVariation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 06c8aa7..aeae059 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -367,7 +367,7 @@ function Variation(ref::S, edit::AbstractString) where {S<:BioSequence} return Variation{S,T}(ref, e) end -reference(v::Variation) = v.reference +reference(v::Variation) = v.ref edit(v::Variation) = v.edit mutation(v::Variation) = mutation(edit(v)) BioGenerics.leftposition(v::Variation) = leftposition(edit(v)) From 73c2c2764f520e8e2b963bfae6b6371fd1c2c0fe Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:50:20 -0500 Subject: [PATCH 02/11] Add private reference base getter for Substitution --- src/SequenceVariation.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index aeae059..b7d6fb8 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -464,6 +464,10 @@ function variations(v::Variant) return vs end +function _refbases(s::Substitution, reference::S, pos::UInt) where S <: BioSequence + return S([reference[pos]]) +end + export Insertion, Deletion, Substitution, From c1f47c7b223ff789091b170b4e8ce48e7d69bbb4 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:50:40 -0500 Subject: [PATCH 03/11] Add private alternate base getter for Substitution --- src/SequenceVariation.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index b7d6fb8..785384a 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -468,6 +468,10 @@ function _refbases(s::Substitution, reference::S, pos::UInt) where S <: BioSeque return S([reference[pos]]) end +function _altbases(s::Substitution, reference::S, pos::UInt) where S <: BioSequence + return S([s.x]) +end + export Insertion, Deletion, Substitution, From bc073eaf20b807cb4c4be8a36f6b5c5eda069bc0 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:51:13 -0500 Subject: [PATCH 04/11] Add private reference base getter for Deletion --- src/SequenceVariation.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 785384a..8ad2972 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -472,6 +472,14 @@ function _altbases(s::Substitution, reference::S, pos::UInt) where S <: BioSeque return S([s.x]) end +function _refbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence + if pos == 1 + return S(reference[UnitRange{Int}(pos, pos+length(d))]) + else + return S(reference[UnitRange{Int}(pos-1, pos+length(d)-1)]) + end +end + export Insertion, Deletion, Substitution, From 145e9e5f8853072c47b3cd004d0a3783810c9372 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:51:35 -0500 Subject: [PATCH 05/11] Add private alternate base getter for Deletion --- src/SequenceVariation.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 8ad2972..3bb4f73 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -480,6 +480,14 @@ function _refbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence end end +function _altbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence + if pos == 1 + return S([reference[pos+1]]) + else + return S([reference[pos-1]]) + end +end + export Insertion, Deletion, Substitution, From 1b7c7f86d78ef68020810595e273515e018b2273 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:51:58 -0500 Subject: [PATCH 06/11] Add private reference base getter for Insertion --- src/SequenceVariation.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 3bb4f73..6ae7b54 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -488,6 +488,10 @@ function _altbases(d::Deletion, reference::S, pos::UInt) where S <: BioSequence end end +function _refbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence + return S([reference[pos]]) +end + export Insertion, Deletion, Substitution, From ccbe6eed62ff2c96b115323d50d6faaa70655cf3 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:52:16 -0500 Subject: [PATCH 07/11] Add private alternate base getter for Insertion --- src/SequenceVariation.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 6ae7b54..7012c33 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -492,6 +492,14 @@ function _refbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence return S([reference[pos]]) end +function _altbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence + if pos == 1 + return S([i.seq..., reference[pos]]) + else + return S([reference[pos], i.seq...]) + end +end + export Insertion, Deletion, Substitution, From cab314ec0baaa8b076cea5b8cbcfd86ce0aa71b1 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:54:51 -0500 Subject: [PATCH 08/11] Add public reference base getter for Variation --- src/SequenceVariation.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index 7012c33..ca5b1f9 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -500,6 +500,10 @@ function _altbases(i::Insertion, reference::S, pos::UInt) where S <: BioSequence end end +function refbases(v::Variation) + return _refbases(mutation(v), reference(v), leftposition(v)) +end + export Insertion, Deletion, Substitution, @@ -507,6 +511,7 @@ export Insertion, Variation, reference, mutation, - variations + variations, + refbases end # module From b8e44c7b815f2021fddc022822737f02a46503e3 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:56:33 -0500 Subject: [PATCH 09/11] Add public alternate base getter for Variation --- src/SequenceVariation.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/SequenceVariation.jl b/src/SequenceVariation.jl index ca5b1f9..0237a15 100644 --- a/src/SequenceVariation.jl +++ b/src/SequenceVariation.jl @@ -504,6 +504,10 @@ function refbases(v::Variation) return _refbases(mutation(v), reference(v), leftposition(v)) end +function altbases(v::Variation) + return _altbases(mutation(v), reference(v), leftposition(v)) +end + export Insertion, Deletion, Substitution, @@ -512,6 +516,7 @@ export Insertion, reference, mutation, variations, - refbases + refbases, + altbases end # module From 97e1d193a64b06f298c990076dc83aeaf3b87e44 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 7 Jul 2022 10:38:15 -0500 Subject: [PATCH 10/11] Add VCF spec tests for reference and alternate bases --- test/runtests.jl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index cad0d7f..9982a55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -69,3 +69,33 @@ end sub = Variation(refseq, "A4T") @test first(variations(var)) == sub end + +@testset "VariationBases" begin + # Test substition bases + @test refbases(Variation(dna"ATCGA", "C3G")) == dna"C" + @test altbases(Variation(dna"ATCGA", "C3G")) == dna"G" + + # Test single deletion bases + @test refbases(Variation(dna"ATCGA", "Δ3-3")) == dna"TC" + @test altbases(Variation(dna"ATCGA", "Δ3-3")) == dna"T" + + # Test multiple deletion bases + @test refbases(Variation(dna"ATCGA", "Δ3-4")) == dna"TCG" + @test altbases(Variation(dna"ATCGA", "Δ3-4")) == dna"T" + + # Test first position deletion + @test refbases(Variation(dna"ATCGA", "Δ1-1")) == dna"AT" + @test altbases(Variation(dna"ATCGA", "Δ1-1")) == dna"T" + + # Test single insertion bases + @test refbases(Variation(dna"ATCGA", "3A")) == dna"C" + @test altbases(Variation(dna"ATCGA", "3A")) == dna"CA" + + # Test multiple insertion bases + @test refbases(Variation(dna"ATCGA", "3TAG")) == dna"C" + @test altbases(Variation(dna"ATCGA", "3TAG")) == dna"CTAG" + + # Test first position insertion + @test refbases(Variation(dna"ATCGA", "1C")) == dna"A" + @test altbases(Variation(dna"ATCGA", "1C")) == dna"CA" +end From 1cfe128070f55c582688abf11ce081cc7112ee52 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Wed, 13 Jul 2022 16:52:11 -0500 Subject: [PATCH 11/11] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61da4e8..802a674 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,5 +14,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `Variation` type to store and compare individual mutations - `reconstruct!` function to build mutated sequences from `Variant`s - `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment` +- Methods to get reference and alternate bases from a `Variation` [unreleased]: https://github.com/BioJulia/SequenceVariation.jl