@ -1,41 +1,43 @@
"""
Variant { S <: BioSequence , T <: BioSymbol }
Haplotype { S <: BioSequence , T <: BioSymbol }
A set of variations within a given sequence that are all found together . Depending on the
field , it might also be referred to as a " genotype ," " haplotype, " or " strain. "
field , it might also be referred to as a " genotype " or " strain. "
# Constructors
Variant ( ref :: S , edits :: Vector { Edit { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
Variant ( ref :: S , vars :: Vector { Variation { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
Variant (
Haplotype ( ref :: S , edits :: Vector { Edit { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
Haplotype ( ref :: S , vars :: Vector { Variation { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
Haplotype (
aln :: PairwiseAlignment { T , T }
) where { T <: LongSequence { <: Union { BS . AminoAcidAlphabet , BS . NucleicAcidAlphabet } } }
When constructing a ` Variant ` from a vector of [ ` Edit ` ] ( @ref ) s or [ ` Variation ` ] ( @ref ) s , the
edits are applied sequentially from first to last position , therefore the vector must always
be sorted by position . These edits are sorted automatically if constructing from an
When constructing a ` Haplotype ` from a vector of [ ` Edit ` ] ( @ref ) s or [ ` Variation ` ] ( @ref ) s ,
the edits are applied sequentially from first to last position , therefore the vector must
always be sorted by position . These edits are sorted automatically if constructing from an
alignment .
"""
struct Variant { S <: BioSequence , T <: BioSymbol }
struct Haplotype { S <: BioSequence , T <: BioSymbol }
ref :: S
edits :: Vector { Edit { S , T } }
Variant { S , T } ( ref :: S , edits :: Vector { Edit { S , T } } , :: Unsafe ) where { S , T } = new ( ref , edits )
Haplotype { S , T } ( ref :: S , edits :: Vector { Edit { S , T } } , :: Unsafe ) where { S , T } = new ( ref , edits )
end
function Variant { S , T } ( ref :: S , edits :: Vector { Edit { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
function Haplotype { S , T } (
ref :: S , edits :: Vector { Edit { S , T } }
) where { S <: BioSequence , T <: BioSymbol }
sort! ( edits ; by = x -> x . pos )
result = Variant { S , T } ( ref , edits , Unsafe ( ) )
result = Haplotype { S , T } ( ref , edits , Unsafe ( ) )
_is_valid ( result ) || error ( " TODO " ) # report what kind of error message?
return result
end
function Variant ( ref :: S , edits :: Vector { Edit { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
return Variant { S , T } ( ref , edits )
function Haplotype ( ref :: S , edits :: Vector { Edit { S , T } } ) where { S <: BioSequence , T <: BioSymbol }
return Haplotype { S , T } ( ref , edits )
end
function Base . show ( io :: IO , x :: Variant )
function Base . show ( io :: IO , x :: Haplotype )
n = length ( x . edits )
print ( io , summary ( x ) , " with $n edit $ ( n > 1 ? " s " : " " ) : " )
for i in x . edits
@ -46,21 +48,21 @@ function Base.show(io::IO, x::Variant)
end
"""
is_valid ( v:: Variant )
is_valid ( h:: Haplotype )
Validate ` v` . ` v ` is invalid if any of its operations are out of bounds , or the same
Validate ` h` . ` h ` is invalid if any of its operations are out of bounds , or the same
position is affected by multiple edits .
"""
function _is_valid ( v:: Variant )
isempty ( v . ref ) && return false
valid_positions = 1 : length ( v . ref )
function _is_valid ( h:: Haplotype )
isempty ( h . ref ) && return false
valid_positions = 1 : length ( h . ref )
last_was_insert = false
for edit in v . edits
for edit in h . edits
pos = edit . pos
op = edit . x
# Sanity check: for this to be a valid variant, it must be comprised of valid
# variations
_is_valid ( Variation ( v . ref , edit ) ) || return false
_is_valid ( Variation ( h . ref , edit ) ) || return false
# For substitutions we simply do not allow another modification of the same base
if op isa Substitution
@ -87,7 +89,7 @@ function _is_valid(v::Variant)
return true
end
function Variant (
function Haplotype (
aln :: PairwiseAlignment { T , T }
) where { T <: LongSequence { <: Union { BS . AminoAcidAlphabet , BS . NucleicAcidAlphabet } } }
ref = aln . b
@ -141,30 +143,30 @@ function Variant(
end
end
return Variant ( ref , edits )
return Haplotype ( ref , edits )
end
"""
_edits ( v:: Variant )
_edits ( h:: Haplotype )
Gets the [ ` Edit ` ] ( @ref ) s that comprise ` v `
Gets the [ ` Edit ` ] ( @ref ) s that comprise ` h `
"""
_edits ( v:: Variant ) = v . edits
_edits ( h:: Haplotype ) = h . edits
"""
reference ( v:: Variant )
reference ( h:: Haplotype )
Gets the reference sequence of ` v ` .
Gets the reference sequence of ` h ` .
"""
reference ( v:: Variant ) = v . ref
Base . : ( == ) ( x :: Variant, y :: Variant ) = x . ref == y . ref && x . edits == y . edits
reference ( h:: Haplotype ) = h . ref
Base . : ( == ) ( x :: Haplotype, y :: Haplotype ) = x . ref == y . ref && x . edits == y . edits
"""
reconstruct! ( seq :: S , x :: Variant { S } ) where { S }
reconstruct! ( seq :: S , x :: Haplotype { S } ) where { S }
Apply the edits in ` x ` to ` seq ` and return the mutated sequence
"""
function reconstruct! ( seq :: S , x :: Variant { S } ) where { S }
function reconstruct! ( seq :: S , x :: Haplotype { S } ) where { S }
len = length ( x . ref ) + sum ( edit -> _lendiff ( edit ) , _edits ( x ) )
resize! ( seq , len % UInt )
refpos = seqpos = 1