Compare commits

...

12 commits

Author SHA1 Message Date
17f4cd6f39 Update CHANGELOG 2022-12-17 22:02:15 +00:00
128a5445ad Switch alignment validation to after edit construction 2022-12-17 22:02:15 +00:00
f9058c5cb3 Update ranges to allow end position insertions 2022-12-17 22:02:15 +00:00
5be4dce200 Add check for soft clips when construction Variants from alignment 2022-12-17 22:02:15 +00:00
d8435be115 Add Variation-level validation for Variants
Testing indicates that some Variants (particularly those with insertions
and/or clips at the send of the query sequence) will validate fine as a
Variant, but will fail validation as a Variation. This problem is
particularly annoying when constructing Variants in the REPL, as the
creation will complete successfully, but the show of the Variant will
error out. To fix this, leverage the existing validation of Variation to
check each Edit within a Variant upon construction.
2022-12-17 22:02:15 +00:00
91c3acc85e Add equality function for Variant 2022-12-17 22:02:15 +00:00
f9e76d60d6 Add test for ending substitutions being invalid 2022-12-17 22:02:15 +00:00
3b61ccefb5 Add test for ending insertion 2022-12-17 22:02:15 +00:00
abff6692d4 Add test for combined clips 2022-12-17 22:02:15 +00:00
dd00231840 Add test for soft clips 2022-12-17 22:02:15 +00:00
b45081a56e Update CHANGELOG for v0.1.3 2022-11-22 18:07:53 +00:00
00495d4e14 Bump version number in Project.toml 2022-11-22 18:07:53 +00:00
4 changed files with 47 additions and 12 deletions

View file

@ -7,13 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## [0.1.3] - 2022-11-22
## Changed
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
### Fixed
- Soft clips at end of alignment causing invalid `Variant`s ([#25](https://github.com/BioJulia/SequenceVariation.jl/issues/25)/[#26](https://github.com/BioJulia/SequenceVariation.jl/pull/26))
## [0.1.2] - 2022-10-04
- ## Changed
## Changed
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
- BioAlignments: 2 -> 2,3
@ -37,7 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
- Methods to get reference and alternate bases from a `Variation`
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...HEAD
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.3...HEAD
[0.1.3]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...v0.1.3
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0

View file

@ -1,7 +1,7 @@
name = "SequenceVariation"
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
version = "0.1.2"
version = "0.1.3"
[deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"

View file

@ -20,7 +20,7 @@ TODO now:
* Add tests
"""
using BioAlignments: BioAlignments, PairwiseAlignment
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP
using BioGenerics: BioGenerics, leftposition, rightposition
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
using BioSymbols: BioSymbol
@ -233,6 +233,10 @@ function is_valid(v::Variant)
for edit in v.edits
pos = edit.pos
op = edit.x
# Sanity check: for this to be a valid variant, it must be comprised of valid
# variations
is_valid(Variation(v.ref, edit)) || return false
# For substitutions we simply do not allow another modification of the same base
if op isa Substitution
pos in valid_positions || return false
@ -242,7 +246,7 @@ function is_valid(v::Variant)
# for next op. However, we cannot have two insertions at the same position, because
# then the order of them is ambiguous
elseif op isa Insertion
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)) || return false
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)+1) || return false
last_was_insert = true
# Deletions obviously invalidate the reference bases that are deleted.
elseif op isa Deletion
@ -259,7 +263,6 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
ref = aln.b
E = eltype(typeof(ref))
edits = Edit{T, E}[]
result = Variant(ref, edits)
refpos = first(aln.a.aln.anchors).refpos
seqpos = first(aln.a.aln.anchors).seqpos
markpos = 0
@ -296,18 +299,24 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
end
end
# Check for clips at the end of the alignment
last_anchors = aln.a.aln.anchors[end-1:end]
# Final indel, if applicable
if !iszero(n_gaps)
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
elseif !iszero(n_ins)
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
if !any(anchor -> anchor.op == OP_SOFT_CLIP, last_anchors)
if !iszero(n_gaps)
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
elseif !iszero(n_ins)
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
end
end
return result
return Variant(ref, edits)
end
edits(v::Variant) = v.edits
reference(v::Variant) = v.ref
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
function lendiff(edit::Edit)
x = edit.x
@ -389,7 +398,7 @@ function is_valid(v::Variation)
if op isa Substitution
return pos in eachindex(v.ref)
elseif op isa Insertion
return pos in 0:lastindex(v.ref)
return pos in 0:lastindex(v.ref)+1
elseif op isa Deletion
return pos in 1:(lastindex(v.ref)-length(op) + 1)
end

View file

@ -106,3 +106,22 @@ end
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
end
@testset "SoftclipVariant" begin
refseq = dna"GATTACA"
mutseq = dna"GATTACAAAA"
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq), eltype(refseq)}[])
# Test for ending soft clip
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)) == refvar
# Test for ending soft+hard clip
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)) == refvar
# Test that ending insertions are still valid
@test length(Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)).edits) == 1
# Test that out-of-bounds bases are still caught
@test_throws BoundsError Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq))
end