mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-24 06:19:54 +00:00
Compare commits
No commits in common. "17f4cd6f395610f30ee0856f55a473b830378455" and "3ed27f0c4e53c2662da4ddd1af224fed733de44c" have entirely different histories.
17f4cd6f39
...
3ed27f0c4e
4 changed files with 12 additions and 47 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
@ -7,19 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
## [0.1.3] - 2022-11-22
|
|
||||||
|
|
||||||
## Changed
|
## Changed
|
||||||
|
|
||||||
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
|
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Soft clips at end of alignment causing invalid `Variant`s ([#25](https://github.com/BioJulia/SequenceVariation.jl/issues/25)/[#26](https://github.com/BioJulia/SequenceVariation.jl/pull/26))
|
|
||||||
|
|
||||||
## [0.1.2] - 2022-10-04
|
## [0.1.2] - 2022-10-04
|
||||||
|
|
||||||
## Changed
|
- ## Changed
|
||||||
|
|
||||||
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
|
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
|
||||||
- BioAlignments: 2 -> 2,3
|
- BioAlignments: 2 -> 2,3
|
||||||
|
@ -43,8 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
|
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
|
||||||
- Methods to get reference and alternate bases from a `Variation`
|
- Methods to get reference and alternate bases from a `Variation`
|
||||||
|
|
||||||
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.3...HEAD
|
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...HEAD
|
||||||
[0.1.3]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...v0.1.3
|
|
||||||
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
|
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
|
||||||
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
|
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
|
||||||
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0
|
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
name = "SequenceVariation"
|
name = "SequenceVariation"
|
||||||
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
|
||||||
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||||
version = "0.1.3"
|
version = "0.1.2"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
|
|
|
@ -20,7 +20,7 @@ TODO now:
|
||||||
* Add tests
|
* Add tests
|
||||||
"""
|
"""
|
||||||
|
|
||||||
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP
|
using BioAlignments: BioAlignments, PairwiseAlignment
|
||||||
using BioGenerics: BioGenerics, leftposition, rightposition
|
using BioGenerics: BioGenerics, leftposition, rightposition
|
||||||
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
|
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
|
||||||
using BioSymbols: BioSymbol
|
using BioSymbols: BioSymbol
|
||||||
|
@ -233,10 +233,6 @@ function is_valid(v::Variant)
|
||||||
for edit in v.edits
|
for edit in v.edits
|
||||||
pos = edit.pos
|
pos = edit.pos
|
||||||
op = edit.x
|
op = edit.x
|
||||||
# Sanity check: for this to be a valid variant, it must be comprised of valid
|
|
||||||
# variations
|
|
||||||
is_valid(Variation(v.ref, edit)) || return false
|
|
||||||
|
|
||||||
# For substitutions we simply do not allow another modification of the same base
|
# For substitutions we simply do not allow another modification of the same base
|
||||||
if op isa Substitution
|
if op isa Substitution
|
||||||
pos in valid_positions || return false
|
pos in valid_positions || return false
|
||||||
|
@ -246,7 +242,7 @@ function is_valid(v::Variant)
|
||||||
# for next op. However, we cannot have two insertions at the same position, because
|
# for next op. However, we cannot have two insertions at the same position, because
|
||||||
# then the order of them is ambiguous
|
# then the order of them is ambiguous
|
||||||
elseif op isa Insertion
|
elseif op isa Insertion
|
||||||
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)+1) || return false
|
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)) || return false
|
||||||
last_was_insert = true
|
last_was_insert = true
|
||||||
# Deletions obviously invalidate the reference bases that are deleted.
|
# Deletions obviously invalidate the reference bases that are deleted.
|
||||||
elseif op isa Deletion
|
elseif op isa Deletion
|
||||||
|
@ -263,6 +259,7 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
||||||
ref = aln.b
|
ref = aln.b
|
||||||
E = eltype(typeof(ref))
|
E = eltype(typeof(ref))
|
||||||
edits = Edit{T, E}[]
|
edits = Edit{T, E}[]
|
||||||
|
result = Variant(ref, edits)
|
||||||
refpos = first(aln.a.aln.anchors).refpos
|
refpos = first(aln.a.aln.anchors).refpos
|
||||||
seqpos = first(aln.a.aln.anchors).seqpos
|
seqpos = first(aln.a.aln.anchors).seqpos
|
||||||
markpos = 0
|
markpos = 0
|
||||||
|
@ -299,24 +296,18 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Check for clips at the end of the alignment
|
|
||||||
last_anchors = aln.a.aln.anchors[end-1:end]
|
|
||||||
|
|
||||||
# Final indel, if applicable
|
# Final indel, if applicable
|
||||||
if !any(anchor -> anchor.op == OP_SOFT_CLIP, last_anchors)
|
|
||||||
if !iszero(n_gaps)
|
if !iszero(n_gaps)
|
||||||
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
|
||||||
elseif !iszero(n_ins)
|
elseif !iszero(n_ins)
|
||||||
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
return Variant(ref, edits)
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
edits(v::Variant) = v.edits
|
edits(v::Variant) = v.edits
|
||||||
reference(v::Variant) = v.ref
|
reference(v::Variant) = v.ref
|
||||||
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
|
|
||||||
|
|
||||||
function lendiff(edit::Edit)
|
function lendiff(edit::Edit)
|
||||||
x = edit.x
|
x = edit.x
|
||||||
|
@ -398,7 +389,7 @@ function is_valid(v::Variation)
|
||||||
if op isa Substitution
|
if op isa Substitution
|
||||||
return pos in eachindex(v.ref)
|
return pos in eachindex(v.ref)
|
||||||
elseif op isa Insertion
|
elseif op isa Insertion
|
||||||
return pos in 0:lastindex(v.ref)+1
|
return pos in 0:lastindex(v.ref)
|
||||||
elseif op isa Deletion
|
elseif op isa Deletion
|
||||||
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
return pos in 1:(lastindex(v.ref)-length(op) + 1)
|
||||||
end
|
end
|
||||||
|
|
|
@ -106,22 +106,3 @@ end
|
||||||
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
|
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
|
||||||
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "SoftclipVariant" begin
|
|
||||||
refseq = dna"GATTACA"
|
|
||||||
mutseq = dna"GATTACAAAA"
|
|
||||||
|
|
||||||
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq), eltype(refseq)}[])
|
|
||||||
|
|
||||||
# Test for ending soft clip
|
|
||||||
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)) == refvar
|
|
||||||
|
|
||||||
# Test for ending soft+hard clip
|
|
||||||
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)) == refvar
|
|
||||||
|
|
||||||
# Test that ending insertions are still valid
|
|
||||||
@test length(Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)).edits) == 1
|
|
||||||
|
|
||||||
# Test that out-of-bounds bases are still caught
|
|
||||||
@test_throws BoundsError Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq))
|
|
||||||
end
|
|
||||||
|
|
Loading…
Reference in a new issue