mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-24 06:19:54 +00:00
Compare commits
6 commits
3ef4a92ba2
...
8a71715cd9
Author | SHA1 | Date | |
---|---|---|---|
8a71715cd9 | |||
4e0d88039d | |||
1384783fcc | |||
46c84b06bb | |||
5839de3ce1 | |||
44aa41e628 |
10 changed files with 95 additions and 91 deletions
|
@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
- Code now follows [Blue style](https://github.com/invenia/BlueStyle) ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
- Code now follows [Blue style](https://github.com/invenia/BlueStyle) ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
||||||
- :bomb: [BREAKING] Public and private API defined based on Blue style guidelines ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
- :bomb: [BREAKING] Public and private API defined based on Blue style guidelines ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
||||||
|
- :bomb: [BREAKING] Renamed type `Variant` to `Haplotype` ([#20](https://github.com/BioJulia/SequenceVariation.jl/issues/20)/[#29](https://github.com/BioJulia/SequenceVariation.jl/pull/29))
|
||||||
|
|
||||||
### Removed
|
### Removed
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ makedocs(;
|
||||||
modules=[SequenceVariation],
|
modules=[SequenceVariation],
|
||||||
pages=[
|
pages=[
|
||||||
"Home" => "index.md",
|
"Home" => "index.md",
|
||||||
"Working with variants" => "variants.md",
|
"Working with haplotypes" => "haplotypes.md",
|
||||||
"Working with variations" => "variations.md",
|
"Working with variations" => "variations.md",
|
||||||
"Comparing variations" => "compare.md",
|
"Comparing variations" => "compare.md",
|
||||||
"API Reference" => "api.md",
|
"API Reference" => "api.md",
|
||||||
|
|
|
@ -18,8 +18,8 @@ Insertion
|
||||||
## Variants
|
## Variants
|
||||||
|
|
||||||
```@docs
|
```@docs
|
||||||
Variant
|
Haplotype
|
||||||
reference(::Variant)
|
reference(::Haplotype)
|
||||||
variations
|
variations
|
||||||
reconstruct!
|
reconstruct!
|
||||||
```
|
```
|
||||||
|
@ -49,7 +49,7 @@ _lendiff
|
||||||
|
|
||||||
```@docs
|
```@docs
|
||||||
_edits
|
_edits
|
||||||
_is_valid(::Variant)
|
_is_valid(::Haplotype)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Variations
|
### Variations
|
||||||
|
|
|
@ -4,9 +4,9 @@ CurrentModule = SequenceVariation
|
||||||
|
|
||||||
# Comparing variations in sequences
|
# Comparing variations in sequences
|
||||||
|
|
||||||
## Checking for variations in a known variant
|
## Checking for variations in a known haplotype
|
||||||
|
|
||||||
Looking for a known [`Variation`](@ref) within a [`Variant`](@ref) is
|
Looking for a known [`Variation`](@ref) within a [`Haplotype`](@ref) is
|
||||||
efficiently accomplished using the `in` operator.
|
efficiently accomplished using the `in` operator.
|
||||||
|
|
||||||
```@setup call_variants
|
```@setup call_variants
|
||||||
|
@ -21,27 +21,27 @@ bos_ovis_alignment =
|
||||||
bos_human_alignment =
|
bos_human_alignment =
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
||||||
|
|
||||||
bos_ovis_variant = Variant(bos_ovis_alignment)
|
bos_ovis_haplotype = Haplotype(bos_ovis_alignment)
|
||||||
bos_human_variant = Variant(bos_human_alignment)
|
bos_human_haplotype = Haplotype(bos_human_alignment)
|
||||||
```
|
```
|
||||||
|
|
||||||
```@example call_variants
|
```@example call_variants
|
||||||
println("\tOvis aires\tHomo sapiens")
|
println("\tOvis aires\tHomo sapiens")
|
||||||
for v in vcat(variations(bos_ovis_variant), variations(bos_human_variant))
|
for v in vcat(variations(bos_ovis_haplotype), variations(bos_human_haplotype))
|
||||||
is_sheep = v in bos_ovis_variant
|
is_sheep = v in bos_ovis_haplotype
|
||||||
is_human = v in bos_human_variant
|
is_human = v in bos_human_haplotype
|
||||||
println("$v\t$is_sheep\t\t$is_human")
|
println("$v\t$is_sheep\t\t$is_human")
|
||||||
end
|
end
|
||||||
```
|
```
|
||||||
|
|
||||||
## Constructing new variants based on other variations
|
## Constructing new haplotypes based on other variations
|
||||||
|
|
||||||
New variants can be constructed using variations. This might be useful to pool
|
New haplotypes can be constructed using variations. This might be useful to pool
|
||||||
variations found on different reads or to filter variations from a variant
|
variations found on different reads or to filter variations from a haplotype
|
||||||
that aren't validated by another variant.
|
that aren't validated by another haplotype.
|
||||||
|
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
sheeple = vcat(variations(bos_ovis_variant), variations(bos_human_variant));
|
sheeple = vcat(variations(bos_ovis_haplotype), variations(bos_human_haplotype));
|
||||||
Variant(bovine, sheeple)
|
Haplotype(bovine, sheeple)
|
||||||
reconstruct!(bovine, ans)
|
reconstruct!(bovine, ans)
|
||||||
```
|
```
|
||||||
|
|
|
@ -2,13 +2,14 @@
|
||||||
CurrentModule = SequenceVariation
|
CurrentModule = SequenceVariation
|
||||||
```
|
```
|
||||||
|
|
||||||
# Working with variants
|
# Working with haplotypes
|
||||||
|
|
||||||
## Calling variants
|
## Calling variants
|
||||||
|
|
||||||
The first step in working with sequence variation is to identify (call)
|
The first step in working with sequence variation is to identify (call)
|
||||||
variations. SequenceVariation can directly call variants using the
|
variations between two sequences. SequenceVariation can directly call variants
|
||||||
`Variant(::PairwiseAlignment)` constructor of the [`Variant`](@ref) type.
|
using the `Haplotype(::PairwiseAlignment)` constructor of the
|
||||||
|
[`Haplotype`](@ref) type.
|
||||||
|
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
using SequenceVariation, BioAlignments, BioSequences
|
using SequenceVariation, BioAlignments, BioSequences
|
||||||
|
@ -22,19 +23,19 @@ bos_ovis_alignment =
|
||||||
bos_human_alignment =
|
bos_human_alignment =
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
||||||
|
|
||||||
bos_ovis_variant = Variant(bos_ovis_alignment)
|
bos_ovis_haplotype = Haplotype(bos_ovis_alignment)
|
||||||
bos_human_variant = Variant(bos_human_alignment)
|
bos_human_haplotype = Haplotype(bos_human_alignment)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Sequence reconstruction
|
## Sequence reconstruction
|
||||||
|
|
||||||
If the alternate sequence of a variant is no longer available (as is often the
|
If the alternate sequence of a haplotype is no longer available (as is often the
|
||||||
case when calling variants from alignment files), then the sequence can be
|
case when calling variants from alignment files), then the sequence can be
|
||||||
retrieved using the [`reconstruct!`](@ref) function.
|
retrieved using the [`reconstruct!`](@ref) function.
|
||||||
|
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
human2 = copy(bovine);
|
human2 = copy(bovine);
|
||||||
reconstruct!(human2, bos_human_variant)
|
reconstruct!(human2, bos_human_haplotype)
|
||||||
human2 == bovine
|
human2 == bovine
|
||||||
human2 == human
|
human2 == human
|
||||||
```
|
```
|
|
@ -27,7 +27,7 @@ mutation(Variation(bovine_ins, "25ACA"))
|
||||||
|
|
||||||
## Extraction
|
## Extraction
|
||||||
|
|
||||||
Sequence variations may also be extracted wholesale from a [`Variant`](@ref)
|
Sequence variations may also be extracted wholesale from a [`Haplotype`](@ref)
|
||||||
using the [`variations`](@ref) function.
|
using the [`variations`](@ref) function.
|
||||||
|
|
||||||
```@setup call_variants
|
```@setup call_variants
|
||||||
|
@ -42,13 +42,13 @@ bos_ovis_alignment =
|
||||||
bos_human_alignment =
|
bos_human_alignment =
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine);
|
||||||
|
|
||||||
bos_ovis_variant = Variant(bos_ovis_alignment)
|
bos_ovis_haplotype = Haplotype(bos_ovis_alignment)
|
||||||
bos_human_variant = Variant(bos_human_alignment)
|
bos_human_haplotype = Haplotype(bos_human_alignment)
|
||||||
```
|
```
|
||||||
|
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
variations(bos_ovis_variant)
|
variations(bos_ovis_haplotype)
|
||||||
variations(bos_human_variant)
|
variations(bos_human_haplotype)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Reference switching
|
## Reference switching
|
||||||
|
@ -59,8 +59,8 @@ alignment between the new and old references using the [`translate`](@ref).
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
ovis_human_alignment =
|
ovis_human_alignment =
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
|
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
|
||||||
human_variation = first(variations(bos_ovis_variant))
|
human_variation = first(variations(bos_ovis_haplotype))
|
||||||
reference(ans) == bovine
|
reference(ans) == bovine
|
||||||
SequenceVariation.translate(human_variation, ovis_human_alignment)
|
SequenceVariation.translate(human_variation, ovis_human_haplotype)
|
||||||
reference(ans) == bovine
|
reference(ans) == bovine
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,41 +1,43 @@
|
||||||
"""
|
"""
|
||||||
Variant{S<:BioSequence,T<:BioSymbol}
|
Haplotype{S<:BioSequence,T<:BioSymbol}
|
||||||
|
|
||||||
A set of variations within a given sequence that are all found together. Depending on the
|
A set of variations within a given sequence that are all found together. Depending on the
|
||||||
field, it might also be referred to as a "genotype," "haplotype," or "strain."
|
field, it might also be referred to as a "genotype" or "strain."
|
||||||
|
|
||||||
# Constructors
|
# Constructors
|
||||||
|
|
||||||
Variant(ref::S, edits::Vector{Edit{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
Haplotype(ref::S, edits::Vector{Edit{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
Haplotype(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
Variant(
|
Haplotype(
|
||||||
aln::PairwiseAlignment{T,T}
|
aln::PairwiseAlignment{T,T}
|
||||||
) where {T<:LongSequence{<:Union{BS.AminoAcidAlphabet,BS.NucleicAcidAlphabet}}}
|
) where {T<:LongSequence{<:Union{BS.AminoAcidAlphabet,BS.NucleicAcidAlphabet}}}
|
||||||
|
|
||||||
When constructing a `Variant` from a vector of [`Edit`](@ref)s or [`Variation`](@ref)s, the
|
When constructing a `Haplotype` from a vector of [`Edit`](@ref)s or [`Variation`](@ref)s,
|
||||||
edits are applied sequentially from first to last position, therefore the vector must always
|
the edits are applied sequentially from first to last position, therefore the vector must
|
||||||
be sorted by position. These edits are sorted automatically if constructing from an
|
always be sorted by position. These edits are sorted automatically if constructing from an
|
||||||
alignment.
|
alignment.
|
||||||
"""
|
"""
|
||||||
struct Variant{S<:BioSequence,T<:BioSymbol}
|
struct Haplotype{S<:BioSequence,T<:BioSymbol}
|
||||||
ref::S
|
ref::S
|
||||||
edits::Vector{Edit{S,T}}
|
edits::Vector{Edit{S,T}}
|
||||||
|
|
||||||
Variant{S,T}(ref::S, edits::Vector{Edit{S,T}}, ::Unsafe) where {S,T} = new(ref, edits)
|
Haplotype{S,T}(ref::S, edits::Vector{Edit{S,T}}, ::Unsafe) where {S,T} = new(ref, edits)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Variant{S,T}(ref::S, edits::Vector{Edit{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
function Haplotype{S,T}(
|
||||||
|
ref::S, edits::Vector{Edit{S,T}}
|
||||||
|
) where {S<:BioSequence,T<:BioSymbol}
|
||||||
sort!(edits; by=x -> x.pos)
|
sort!(edits; by=x -> x.pos)
|
||||||
result = Variant{S,T}(ref, edits, Unsafe())
|
result = Haplotype{S,T}(ref, edits, Unsafe())
|
||||||
_is_valid(result) || error("TODO") # report what kind of error message?
|
_is_valid(result) || error("TODO") # report what kind of error message?
|
||||||
return result
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
function Variant(ref::S, edits::Vector{Edit{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
function Haplotype(ref::S, edits::Vector{Edit{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
return Variant{S,T}(ref, edits)
|
return Haplotype{S,T}(ref, edits)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Base.show(io::IO, x::Variant)
|
function Base.show(io::IO, x::Haplotype)
|
||||||
n = length(x.edits)
|
n = length(x.edits)
|
||||||
print(io, summary(x), " with $n edit$(n > 1 ? "s" : ""):")
|
print(io, summary(x), " with $n edit$(n > 1 ? "s" : ""):")
|
||||||
for i in x.edits
|
for i in x.edits
|
||||||
|
@ -46,21 +48,21 @@ function Base.show(io::IO, x::Variant)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
is_valid(v::Variant)
|
is_valid(h::Haplotype)
|
||||||
|
|
||||||
Validate `v`. `v` is invalid if any of its operations are out of bounds, or the same
|
Validate `h`. `h` is invalid if any of its operations are out of bounds, or the same
|
||||||
position is affected by multiple edits.
|
position is affected by multiple edits.
|
||||||
"""
|
"""
|
||||||
function _is_valid(v::Variant)
|
function _is_valid(h::Haplotype)
|
||||||
isempty(v.ref) && return false
|
isempty(h.ref) && return false
|
||||||
valid_positions = 1:length(v.ref)
|
valid_positions = 1:length(h.ref)
|
||||||
last_was_insert = false
|
last_was_insert = false
|
||||||
for edit in v.edits
|
for edit in h.edits
|
||||||
pos = edit.pos
|
pos = edit.pos
|
||||||
op = edit.x
|
op = edit.x
|
||||||
# Sanity check: for this to be a valid variant, it must be comprised of valid
|
# Sanity check: for this to be a valid variant, it must be comprised of valid
|
||||||
# variations
|
# variations
|
||||||
_is_valid(Variation(v.ref, edit)) || return false
|
_is_valid(Variation(h.ref, edit)) || return false
|
||||||
|
|
||||||
# For substitutions we simply do not allow another modification of the same base
|
# For substitutions we simply do not allow another modification of the same base
|
||||||
if op isa Substitution
|
if op isa Substitution
|
||||||
|
@ -87,7 +89,7 @@ function _is_valid(v::Variant)
|
||||||
return true
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
function Variant(
|
function Haplotype(
|
||||||
aln::PairwiseAlignment{T,T}
|
aln::PairwiseAlignment{T,T}
|
||||||
) where {T<:LongSequence{<:Union{BS.AminoAcidAlphabet,BS.NucleicAcidAlphabet}}}
|
) where {T<:LongSequence{<:Union{BS.AminoAcidAlphabet,BS.NucleicAcidAlphabet}}}
|
||||||
ref = aln.b
|
ref = aln.b
|
||||||
|
@ -141,30 +143,30 @@ function Variant(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return Variant(ref, edits)
|
return Haplotype(ref, edits)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
_edits(v::Variant)
|
_edits(h::Haplotype)
|
||||||
|
|
||||||
Gets the [`Edit`](@ref)s that comprise `v`
|
Gets the [`Edit`](@ref)s that comprise `h`
|
||||||
"""
|
"""
|
||||||
_edits(v::Variant) = v.edits
|
_edits(h::Haplotype) = h.edits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
reference(v::Variant)
|
reference(h::Haplotype)
|
||||||
|
|
||||||
Gets the reference sequence of `v`.
|
Gets the reference sequence of `h`.
|
||||||
"""
|
"""
|
||||||
reference(v::Variant) = v.ref
|
reference(h::Haplotype) = h.ref
|
||||||
Base.:(==)(x::Variant, y::Variant) = x.ref == y.ref && x.edits == y.edits
|
Base.:(==)(x::Haplotype, y::Haplotype) = x.ref == y.ref && x.edits == y.edits
|
||||||
|
|
||||||
"""
|
"""
|
||||||
reconstruct!(seq::S, x::Variant{S}) where {S}
|
reconstruct!(seq::S, x::Haplotype{S}) where {S}
|
||||||
|
|
||||||
Apply the edits in `x` to `seq` and return the mutated sequence
|
Apply the edits in `x` to `seq` and return the mutated sequence
|
||||||
"""
|
"""
|
||||||
function reconstruct!(seq::S, x::Variant{S}) where {S}
|
function reconstruct!(seq::S, x::Haplotype{S}) where {S}
|
||||||
len = length(x.ref) + sum(edit -> _lendiff(edit), _edits(x))
|
len = length(x.ref) + sum(edit -> _lendiff(edit), _edits(x))
|
||||||
resize!(seq, len % UInt)
|
resize!(seq, len % UInt)
|
||||||
refpos = seqpos = 1
|
refpos = seqpos = 1
|
|
@ -2,10 +2,10 @@ module SequenceVariation
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Needs to be able to:
|
Needs to be able to:
|
||||||
* Given a sequence and a reference, create a `Variant` that unambiguously represents
|
* Given a sequence and a reference, create a `Haplotype` that unambiguously represents
|
||||||
the sequence
|
the sequence
|
||||||
|
|
||||||
* Given a `Variant` and a new reference, translate the variant to the new reference.
|
* Given a `Haplotype` and a new reference, translate the variant to the new reference.
|
||||||
|
|
||||||
* Given a mutation and a reference and a sequence, determine if the sequence has that
|
* Given a mutation and a reference and a sequence, determine if the sequence has that
|
||||||
mutation
|
mutation
|
||||||
|
@ -26,9 +26,9 @@ using BioSequences: BioSequences, BioSequence, NucleotideSeq, LongSequence, isga
|
||||||
using BioSymbols: BioSymbol
|
using BioSymbols: BioSymbol
|
||||||
|
|
||||||
export Deletion
|
export Deletion
|
||||||
|
export Haplotype
|
||||||
export Insertion
|
export Insertion
|
||||||
export Substitution
|
export Substitution
|
||||||
export Variant
|
|
||||||
export Variation
|
export Variation
|
||||||
export altbases
|
export altbases
|
||||||
export mutation
|
export mutation
|
||||||
|
@ -45,7 +45,7 @@ struct Unsafe end
|
||||||
struct Inapplicable end
|
struct Inapplicable end
|
||||||
|
|
||||||
include("Edit.jl")
|
include("Edit.jl")
|
||||||
include("Variant.jl")
|
include("Haplotype.jl")
|
||||||
include("Variation.jl")
|
include("Variation.jl")
|
||||||
|
|
||||||
end # module
|
end # module
|
||||||
|
|
|
@ -12,7 +12,7 @@ and built-in validation.
|
||||||
Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
||||||
|
|
||||||
Generally speaking, the `Edit` constructor should be avoided to ensure corectness: use of
|
Generally speaking, the `Edit` constructor should be avoided to ensure corectness: use of
|
||||||
[`variations(::Variant)`](@ref) is encouraged, instead.
|
[`variations(::Haplotype)`](@ref) is encouraged, instead.
|
||||||
|
|
||||||
Constructing a `Variation` from an `AbstractString` will parse the from `edit` using the
|
Constructing a `Variation` from an `AbstractString` will parse the from `edit` using the
|
||||||
following syntax:
|
following syntax:
|
||||||
|
@ -45,9 +45,9 @@ function Variation(ref::S, edit::AbstractString) where {S<:BioSequence}
|
||||||
return Variation{S,T}(ref, e)
|
return Variation{S,T}(ref, e)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Variant(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
function Haplotype(ref::S, vars::Vector{Variation{S,T}}) where {S<:BioSequence,T<:BioSymbol}
|
||||||
edits = _edit.(vars)
|
edits = _edit.(vars)
|
||||||
return Variant{S,T}(ref, edits)
|
return Haplotype{S,T}(ref, edits)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -108,7 +108,7 @@ function Base.show(io::IO, x::Variation)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
function Base.in(v::Variation, var::Variant)
|
function Base.in(v::Variation, var::Haplotype)
|
||||||
if v.ref != var.ref
|
if v.ref != var.ref
|
||||||
error("References must be equal")
|
error("References must be equal")
|
||||||
end
|
end
|
||||||
|
@ -171,14 +171,14 @@ function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
variations(v::Variant{S,T}) where {S,T}
|
variations(h::Haplotype{S,T}) where {S,T}
|
||||||
|
|
||||||
Converts the [`Edit`](@ref)s of `v` into a vector of [`Variation`](@ref)s.
|
Converts the [`Edit`](@ref)s of `h` into a vector of [`Variation`](@ref)s.
|
||||||
"""
|
"""
|
||||||
function variations(v::Variant{S,T}) where {S,T}
|
function variations(h::Haplotype{S,T}) where {S,T}
|
||||||
vs = Vector{Variation{S,T}}(undef, length(_edits(v)))
|
vs = Vector{Variation{S,T}}(undef, length(_edits(h)))
|
||||||
for (i, e) in enumerate(_edits(v))
|
for (i, e) in enumerate(_edits(h))
|
||||||
vs[i] = Variation{S,T}(reference(v), e)
|
vs[i] = Variation{S,T}(reference(h), e)
|
||||||
end
|
end
|
||||||
return vs
|
return vs
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
"""
|
"""
|
||||||
Needs to be able to:
|
Needs to be able to:
|
||||||
* Given a sequence and a reference, create a `Variant` that unambiguously represents
|
* Given a sequence and a reference, create a `Haplotype` that unambiguously represents
|
||||||
the sequence
|
the sequence
|
||||||
|
|
||||||
* Given a `Variant` and a new reference, translate the variant to the new reference.
|
* Given a `Haplotype` and a new reference, translate the variant to the new reference.
|
||||||
|
|
||||||
* Given a mutation and a reference and a sequence, determine if the sequence has that
|
* Given a mutation and a reference and a sequence, determine if the sequence has that
|
||||||
mutation
|
mutation
|
||||||
|
@ -34,12 +34,12 @@ const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_
|
||||||
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
|
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
|
||||||
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
|
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
|
||||||
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
|
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
|
||||||
var = Variant(align(seq1, seq2))
|
var = Haplotype(align(seq1, seq2))
|
||||||
|
|
||||||
@testset "VariantRoundtrip" begin
|
@testset "HaplotypeRoundtrip" begin
|
||||||
for v in variations(var)
|
for v in variations(var)
|
||||||
@test v in var
|
@test v in var
|
||||||
@test v in Variant(seq2, [v])
|
@test v in Haplotype(seq2, [v])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ end
|
||||||
read02 = AlignedSequence(mutseq[3:12], Alignment("10M", 1, 3))
|
read02 = AlignedSequence(mutseq[3:12], Alignment("10M", 1, 3))
|
||||||
aln01 = PairwiseAlignment(read01, refseq)
|
aln01 = PairwiseAlignment(read01, refseq)
|
||||||
aln02 = PairwiseAlignment(read02, refseq)
|
aln02 = PairwiseAlignment(read02, refseq)
|
||||||
@test Variant(aln01).edits == Variant(aln02).edits
|
@test Haplotype(aln01).edits == Haplotype(aln02).edits
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "VariationParsing" begin
|
@testset "VariationParsing" begin
|
||||||
|
@ -72,7 +72,7 @@ end
|
||||||
|
|
||||||
read = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1))
|
read = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1))
|
||||||
aln = PairwiseAlignment(read, refseq)
|
aln = PairwiseAlignment(read, refseq)
|
||||||
var = Variant(aln)
|
var = Haplotype(aln)
|
||||||
|
|
||||||
sub = Variation(refseq, "A4T")
|
sub = Variation(refseq, "A4T")
|
||||||
@test first(variations(var)) == sub
|
@test first(variations(var)) == sub
|
||||||
|
@ -108,31 +108,31 @@ end
|
||||||
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "SoftclipVariant" begin
|
@testset "SoftclipHaplotype" begin
|
||||||
refseq = dna"GATTACA"
|
refseq = dna"GATTACA"
|
||||||
mutseq = dna"GATTACAAAA"
|
mutseq = dna"GATTACAAAA"
|
||||||
|
|
||||||
refvar = Variant(refseq, SequenceVariation.Edit{typeof(refseq),eltype(refseq)}[])
|
refvar = Haplotype(refseq, SequenceVariation.Edit{typeof(refseq),eltype(refseq)}[])
|
||||||
|
|
||||||
# Test for ending soft clip
|
# Test for ending soft clip
|
||||||
@test Variant(
|
@test Haplotype(
|
||||||
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)
|
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)
|
||||||
) == refvar
|
) == refvar
|
||||||
|
|
||||||
# Test for ending soft+hard clip
|
# Test for ending soft+hard clip
|
||||||
@test Variant(
|
@test Haplotype(
|
||||||
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)
|
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)
|
||||||
) == refvar
|
) == refvar
|
||||||
|
|
||||||
# Test that ending insertions are still valid
|
# Test that ending insertions are still valid
|
||||||
@test length(
|
@test length(
|
||||||
Variant(
|
Haplotype(
|
||||||
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)
|
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)
|
||||||
).edits,
|
).edits,
|
||||||
) == 1
|
) == 1
|
||||||
|
|
||||||
# Test that out-of-bounds bases are still caught
|
# Test that out-of-bounds bases are still caught
|
||||||
@test_throws BoundsError Variant(
|
@test_throws BoundsError Haplotype(
|
||||||
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq)
|
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq)
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue