mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-24 06:19:54 +00:00
Compare commits
12 commits
3ed27f0c4e
...
17f4cd6f39
Author | SHA1 | Date | |
---|---|---|---|
17f4cd6f39 | |||
128a5445ad | |||
f9058c5cb3 | |||
5be4dce200 | |||
d8435be115 | |||
91c3acc85e | |||
f9e76d60d6 | |||
3b61ccefb5 | |||
abff6692d4 | |||
dd00231840 | |||
b45081a56e | |||
00495d4e14 |
4 changed files with 47 additions and 12 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -7,13 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.1.3] - 2022-11-22
|
||||||
|
|
||||||
## Changed
|
## Changed
|
||||||
|
|
||||||
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
|
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Soft clips at end of alignment causing invalid `Variant`s ([#25](https://github.com/BioJulia/SequenceVariation.jl/issues/25)/[#26](https://github.com/BioJulia/SequenceVariation.jl/pull/26))
|
||||||
|
|
||||||
## [0.1.2] - 2022-10-04
|
## [0.1.2] - 2022-10-04
|
||||||
|
|
||||||
- ## Changed
|
## Changed
|
||||||
|
|
||||||
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
|
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
|
||||||
- BioAlignments: 2 -> 2,3
|
- BioAlignments: 2 -> 2,3
|
||||||
|
@ -37,7 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
|
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
|
||||||
- Methods to get reference and alternate bases from a `Variation`
|
- Methods to get reference and alternate bases from a `Variation`
|
||||||
|
|
||||||
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...HEAD
|
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.3...HEAD
|
||||||
|
[0.1.3]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...v0.1.3
|
||||||
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
|
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
|
||||||
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
|
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
|
||||||
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0
|
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
name = "SequenceVariation"
|
name = "SequenceVariation"
|
||||||
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
||||||
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||||
version = "0.1.2"
|
version = "0.1.3"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
|
|
|
@ -20,7 +20,7 @@ TODO now:
|
||||||
* Add tests
|
* Add tests
|
||||||
"""
|
"""
|
||||||
|
|
||||||
using BioAlignments: BioAlignments, PairwiseAlignment
|
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP
|
||||||
using BioGenerics: BioGenerics, leftposition, rightposition
|
using BioGenerics: BioGenerics, leftposition, rightposition
|
||||||
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
|
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
|
||||||
using BioSymbols: BioSymbol
|
using BioSymbols: BioSymbol
|
||||||
|
@ -233,6 +233,10 @@ function is_valid(v::Variant)
|
||||||
for edit in v.edits
|
for edit in v.edits
|
||||||
pos = edit.pos
|
pos = edit.pos
|
||||||
op = edit.x
|
op = edit.x
|
||||||
|
# Sanity check: for this to be a valid variant, it must be comprised of valid
|
||||||
|
# variations
|
||||||
|
is_valid(Variation(v.ref, edit)) || return false
|
||||||
|
|
||||||
# For substitutions we simply do not allow another modification of the same base
|
# For substitutions we simply do not allow another modification of the same base
|
||||||
if op isa Substitution
|
if op isa Substitution
|
||||||
pos in valid_positions || return false
|
pos in valid_positions || return false
|
||||||
|
@ -242,7 +246,7 @@ function is_valid(v::Variant)
|
||||||
# for next op. However, we cannot have two insertions at the same position, because
|
# for next op. However, we cannot have two insertions at the same position, because
|
||||||
# then the order of them is ambiguous
|
# then the order of them is ambiguous
|
||||||
elseif op isa Insertion
|
elseif op isa Insertion
|
||||||
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)) || return false
|
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)+1) || return false
|
||||||
last_was_insert = true
|
last_was_insert = true
|
||||||
# Deletions obviously invalidate the reference bases that are deleted.
|
# Deletions obviously invalidate the reference bases that are deleted.
|
||||||
elseif op isa Deletion
|
elseif op isa Deletion
|
||||||
|
@ -259,7 +263,6 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
||||||
ref = aln.b
|
ref = aln.b
|
||||||
E = eltype(typeof(ref))
|
E = eltype(typeof(ref))
|
||||||
edits = Edit{T, E}[]
|
edits = Edit{T, E}[]
|
||||||
result = Variant(ref, edits)
|
|
||||||
refpos = first(aln.a.aln.anchors).refpos
|
refpos = first(aln.a.aln.anchors).refpos
|
||||||
seqpos = first(aln.a.aln.anchors).seqpos
|
seqpos = first(aln.a.aln.anchors).seqpos
|
||||||
markpos = 0
|
markpos = 0
|
||||||
|
@ -296,18 +299,24 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Check for clips at the end of the alignment
|
||||||
|
last_anchors = aln.a.aln.anchors[end-1:end]
|
||||||
|
|
||||||
# Final indel, if applicable
|
# Final indel, if applicable
|
||||||
|
if !any(anchor -> anchor.op == OP_SOFT_CLIP, last_anchors)
|
||||||
if !iszero(n_gaps)
|
if !iszero(n_gaps)
|
||||||
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
||||||
elseif !iszero(n_ins)
|
elseif !iszero(n_ins)
|
||||||
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return result
|
return Variant(ref, edits)
|
||||||
end
|
end
|
||||||
|
|
||||||
edits(v::Variant) = v.edits
|
edits(v::Variant) = v.edits
|
||||||
reference(v::Variant) = v.ref
|
reference(v::Variant) = v.ref
|
||||||
|
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
|
||||||
|
|
||||||
function lendiff(edit::Edit)
|
function lendiff(edit::Edit)
|
||||||
x = edit.x
|
x = edit.x
|
||||||
|
@ -389,7 +398,7 @@ function is_valid(v::Variation)
|
||||||
if op isa Substitution
|
if op isa Substitution
|
||||||
return pos in eachindex(v.ref)
|
return pos in eachindex(v.ref)
|
||||||
elseif op isa Insertion
|
elseif op isa Insertion
|
||||||
return pos in 0:lastindex(v.ref)
|
return pos in 0:lastindex(v.ref)+1
|
||||||
elseif op isa Deletion
|
elseif op isa Deletion
|
||||||
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
||||||
end
|
end
|
||||||
|
|
|
@ -106,3 +106,22 @@ end
|
||||||
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
|
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
|
||||||
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@testset "SoftclipVariant" begin
|
||||||
|
refseq = dna"GATTACA"
|
||||||
|
mutseq = dna"GATTACAAAA"
|
||||||
|
|
||||||
|
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq), eltype(refseq)}[])
|
||||||
|
|
||||||
|
# Test for ending soft clip
|
||||||
|
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)) == refvar
|
||||||
|
|
||||||
|
# Test for ending soft+hard clip
|
||||||
|
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)) == refvar
|
||||||
|
|
||||||
|
# Test that ending insertions are still valid
|
||||||
|
@test length(Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)).edits) == 1
|
||||||
|
|
||||||
|
# Test that out-of-bounds bases are still caught
|
||||||
|
@test_throws BoundsError Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq))
|
||||||
|
end
|
||||||
|
|
Loading…
Reference in a new issue