Compare commits

..

No commits in common. "17f4cd6f395610f30ee0856f55a473b830378455" and "3ed27f0c4e53c2662da4ddd1af224fed733de44c" have entirely different histories.

4 changed files with 12 additions and 47 deletions

View file

@ -7,19 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [0.1.3] - 2022-11-22
## Changed ## Changed
- Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23)) - Variations getter now returns type-parameterized vector ([#23](https://github.com/BioJulia/SequenceVariation.jl/pull/23))
### Fixed
- Soft clips at end of alignment causing invalid `Variant`s ([#25](https://github.com/BioJulia/SequenceVariation.jl/issues/25)/[#26](https://github.com/BioJulia/SequenceVariation.jl/pull/26))
## [0.1.2] - 2022-10-04 ## [0.1.2] - 2022-10-04
## Changed - ## Changed
- Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21)) - Updated dependency compats ([#21](https://github.com/BioJulia/SequenceVariation.jl/pull/21))
- BioAlignments: 2 -> 2,3 - BioAlignments: 2 -> 2,3
@ -43,8 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment` - `Variant` constructor to automatically detect mutations from a `BioAlignments.PairwiseAlignment`
- Methods to get reference and alternate bases from a `Variation` - Methods to get reference and alternate bases from a `Variation`
[unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.3...HEAD [unreleased]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...HEAD
[0.1.3]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.2...v0.1.3
[0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2 [0.1.2]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.1...v0.1.2
[0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1 [0.1.1]: https://github.com/BioJulia/SequenceVariation.jl/compare/v0.1.0...v0.1.1
[0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0 [0.1.0]: https://github.com/BioJulia/SequenceVariation.jl/releases/tag/v0.1.0

View file

@ -1,7 +1,7 @@
name = "SequenceVariation" name = "SequenceVariation"
uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8" uuid = "eef6e190-9969-4f06-a38f-35a110a8fdc8"
authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"] authors = ["Jakob Nybo Nissen <jakobnybonissen@gmail.com>", "Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
version = "0.1.3" version = "0.1.2"
[deps] [deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"

View file

@ -20,7 +20,7 @@ TODO now:
* Add tests * Add tests
""" """
using BioAlignments: BioAlignments, PairwiseAlignment, OP_SOFT_CLIP using BioAlignments: BioAlignments, PairwiseAlignment
using BioGenerics: BioGenerics, leftposition, rightposition using BioGenerics: BioGenerics, leftposition, rightposition
using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isgap
using BioSymbols: BioSymbol using BioSymbols: BioSymbol
@ -233,10 +233,6 @@ function is_valid(v::Variant)
for edit in v.edits for edit in v.edits
pos = edit.pos pos = edit.pos
op = edit.x op = edit.x
# Sanity check: for this to be a valid variant, it must be comprised of valid
# variations
is_valid(Variation(v.ref, edit)) || return false
# For substitutions we simply do not allow another modification of the same base # For substitutions we simply do not allow another modification of the same base
if op isa Substitution if op isa Substitution
pos in valid_positions || return false pos in valid_positions || return false
@ -246,7 +242,7 @@ function is_valid(v::Variant)
# for next op. However, we cannot have two insertions at the same position, because # for next op. However, we cannot have two insertions at the same position, because
# then the order of them is ambiguous # then the order of them is ambiguous
elseif op isa Insertion elseif op isa Insertion
pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)+1) || return false pos in (first(valid_positions)-1+last_was_insert:last(valid_positions)) || return false
last_was_insert = true last_was_insert = true
# Deletions obviously invalidate the reference bases that are deleted. # Deletions obviously invalidate the reference bases that are deleted.
elseif op isa Deletion elseif op isa Deletion
@ -263,6 +259,7 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
ref = aln.b ref = aln.b
E = eltype(typeof(ref)) E = eltype(typeof(ref))
edits = Edit{T, E}[] edits = Edit{T, E}[]
result = Variant(ref, edits)
refpos = first(aln.a.aln.anchors).refpos refpos = first(aln.a.aln.anchors).refpos
seqpos = first(aln.a.aln.anchors).seqpos seqpos = first(aln.a.aln.anchors).seqpos
markpos = 0 markpos = 0
@ -299,24 +296,18 @@ function Variant(aln::PairwiseAlignment{T, T}) where {T <: LongSequence{<:Union{
end end
end end
# Check for clips at the end of the alignment
last_anchors = aln.a.aln.anchors[end-1:end]
# Final indel, if applicable # Final indel, if applicable
if !any(anchor -> anchor.op == OP_SOFT_CLIP, last_anchors)
if !iszero(n_gaps) if !iszero(n_gaps)
push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos))) push!(edits, Edit{T, E}(Deletion(UInt(n_gaps)), UInt(markpos)))
elseif !iszero(n_ins) elseif !iszero(n_ins)
push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos))) push!(edits, Edit{T, E}(Insertion(T(insertion_buffer)), UInt(markpos)))
end end
end
return Variant(ref, edits) return result
end end
edits(v::Variant) = v.edits edits(v::Variant) = v.edits
reference(v::Variant) = v.ref reference(v::Variant) = v.ref
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
function lendiff(edit::Edit) function lendiff(edit::Edit)
x = edit.x x = edit.x
@ -398,7 +389,7 @@ function is_valid(v::Variation)
if op isa Substitution if op isa Substitution
return pos in eachindex(v.ref) return pos in eachindex(v.ref)
elseif op isa Insertion elseif op isa Insertion
return pos in 0:lastindex(v.ref)+1 return pos in 0:lastindex(v.ref)
elseif op isa Deletion elseif op isa Deletion
return pos in 1:(lastindex(v.ref)-length(op) + 1) return pos in 1:(lastindex(v.ref)-length(op) + 1)
end end

View file

@ -106,22 +106,3 @@ end
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A" @test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA" @test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
end end
@testset "SoftclipVariant" begin
refseq = dna"GATTACA"
mutseq = dna"GATTACAAAA"
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq), eltype(refseq)}[])
# Test for ending soft clip
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)) == refvar
# Test for ending soft+hard clip
@test Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)) == refvar
# Test that ending insertions are still valid
@test length(Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)).edits) == 1
# Test that out-of-bounds bases are still caught
@test_throws BoundsError Variant(PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq))
end