mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-24 06:19:54 +00:00
Compare commits
12 commits
3ed27f0c4e
...
17f4cd6f39
Author | SHA1 | Date | |
---|---|---|---|
17f4cd6f39 | |||
128a5445ad | |||
f9058c5cb3 | |||
5be4dce200 | |||
d8435be115 | |||
91c3acc85e | |||
f9e76d60d6 | |||
3b61ccefb5 | |||
abff6692d4 | |||
dd00231840 | |||
b45081a56e | |||
00495d4e14 |
4 changed files with 47 additions and 12 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -7,13 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.1.3] - 2022-11-22
|
||||
|
||||
## Changed
|
||||
|
||||
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Soft clips at end of alignment causing invalid `Variant`s ([#25](https://github.com/BioJulia/SequenceVariation.jl/issues/25)/[#26](https://github.com/BioJulia/SequenceVariation.jl/pull/26))
|
||||
|
||||
## [0.1.2] - 2022-10-04
|
||||
|
||||
- ## Changed
|
||||
## Changed
|
||||
|
||||
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
|
||||
- BioAlignments: 2 -> 2,3
|
||||
|
@ -37,7 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
|
||||
- Methods to get reference and alternate bases from a `Variation`
|
||||
|
||||
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...HEAD
|
||||
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.3...HEAD
|
||||
[0.1.3]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...v0.1.3
|
||||
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
|
||||
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
|
||||
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
name = "SequenceVariation"
|
||||
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
||||
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
|
||||
[deps]
|
||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||
|
|
|
@ -20,7 +20,7 @@ TODO now:
|
|||
* Add tests
|
||||
"""
|
||||
|
||||
using BioAlignments: BioAlignments, PairwiseAlignment
|
||||
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP
|
||||
using BioGenerics: BioGenerics, leftposition, rightposition
|
||||
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
|
||||
using BioSymbols: BioSymbol
|
||||
|
@ -233,6 +233,10 @@ function is_valid(v::Variant)
|
|||
for edit in v.edits
|
||||
pos = edit.pos
|
||||
op = edit.x
|
||||
# Sanity check: for this to be a valid variant, it must be comprised of valid
|
||||
# variations
|
||||
is_valid(Variation(v.ref, edit)) || return false
|
||||
|
||||
# For substitutions we simply do not allow another modification of the same base
|
||||
if op isa Substitution
|
||||
pos in valid_positions || return false
|
||||
|
@ -242,7 +246,7 @@ function is_valid(v::Variant)
|
|||
# for next op. However, we cannot have two insertions at the same position, because
|
||||
# then the order of them is ambiguous
|
||||
elseif op isa Insertion
|
||||
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)) || return false
|
||||
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)+1) || return false
|
||||
last_was_insert = true
|
||||
# Deletions obviously invalidate the reference bases that are deleted.
|
||||
elseif op isa Deletion
|
||||
|
@ -259,7 +263,6 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
|||
ref = aln.b
|
||||
E = eltype(typeof(ref))
|
||||
edits = Edit{T, E}[]
|
||||
result = Variant(ref, edits)
|
||||
refpos = first(aln.a.aln.anchors).refpos
|
||||
seqpos = first(aln.a.aln.anchors).seqpos
|
||||
markpos = 0
|
||||
|
@ -296,18 +299,24 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
|||
end
|
||||
end
|
||||
|
||||
# Check for clips at the end of the alignment
|
||||
last_anchors = aln.a.aln.anchors[end-1:end]
|
||||
|
||||
# Final indel, if applicable
|
||||
if !iszero(n_gaps)
|
||||
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
||||
elseif !iszero(n_ins)
|
||||
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
||||
if !any(anchor -> anchor.op == OP_SOFT_CLIP, last_anchors)
|
||||
if !iszero(n_gaps)
|
||||
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
||||
elseif !iszero(n_ins)
|
||||
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
||||
end
|
||||
end
|
||||
|
||||
return result
|
||||
return Variant(ref, edits)
|
||||
end
|
||||
|
||||
edits(v::Variant) = v.edits
|
||||
reference(v::Variant) = v.ref
|
||||
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
|
||||
|
||||
function lendiff(edit::Edit)
|
||||
x = edit.x
|
||||
|
@ -389,7 +398,7 @@ function is_valid(v::Variation)
|
|||
if op isa Substitution
|
||||
return pos in eachindex(v.ref)
|
||||
elseif op isa Insertion
|
||||
return pos in 0:lastindex(v.ref)
|
||||
return pos in 0:lastindex(v.ref)+1
|
||||
elseif op isa Deletion
|
||||
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
||||
end
|
||||
|
|
|
@ -106,3 +106,22 @@ end
|
|||
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
|
||||
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
||||
end
|
||||
|
||||
@testset "SoftclipVariant" begin
|
||||
refseq = dna"GATTACA"
|
||||
mutseq = dna"GATTACAAAA"
|
||||
|
||||
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq), eltype(refseq)}[])
|
||||
|
||||
# Test for ending soft clip
|
||||
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)) == refvar
|
||||
|
||||
# Test for ending soft+hard clip
|
||||
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)) == refvar
|
||||
|
||||
# Test that ending insertions are still valid
|
||||
@test length(Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)).edits) == 1
|
||||
|
||||
# Test that out-of-bounds bases are still caught
|
||||
@test_throws BoundsError Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq))
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue