Compare commits

...

7 Commits

@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- `translate` functionality for `Haplotype`s ([#31](https://github.com/BioJulia/SequenceVariation.jl/pull/31))
## [0.2.0] - 2023-01-10
### Added
- Tutorial-type documentation ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
- `Base.isless` implementation for `Edit` and `Variation` ([#31](https://github.com/BioJulia/SequenceVariation.jl/pull/31))
- `Base.isless` implementation for `Edit` and `Variation` ([#32](https://github.com/BioJulia/SequenceVariation.jl/pull/32))
### Changed

@ -22,6 +22,7 @@ Haplotype
reference(::Haplotype)
variations
reconstruct
translate(::Haplotype{S,T}, ::PairwiseAlignment{S,S}) where {S,T}
```
## Variations
@ -30,7 +31,7 @@ reconstruct
Variation
reference(::Variation)
mutation
translate
translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T}
refbases
altbases
```

@ -38,3 +38,18 @@ human2 = reconstruct(bos_human_haplotype)
human2 == bovine
human2 == human
```
## Reference switching
All variations within a haplotype can be mapped to a new reference sequence
given an alignment between the new and old references using the
[`translate`](@ref translate(::Haplotype{S,T}, ::PairwiseAlignment{S,S}) where {S,T})
function. This could be useful if variants were called against a reference
sequence for the entire species, but need to be analyzed as variants of a
subtype later.
```@repl call_variants
ovis_human_alignment =
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
SequenceVariation.translate(bos_ovis_haplotype, ovis_human_alignment)
```

@ -54,13 +54,15 @@ variations(bos_human_haplotype)
## Reference switching
An individual variation can be mapped to a new reference sequence given an
alignment between the new and old references using the [`translate`](@ref).
alignment between the new and old references using the
[`translate`](@ref translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T})
function.
```@repl call_variants
ovis_human_alignment =
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
human_variation = first(variations(bos_ovis_haplotype))
reference(ans) == bovine
SequenceVariation.translate(human_variation, ovis_human_haplotype)
SequenceVariation.translate(human_variation, ovis_human_alignment)
reference(ans) == bovine
```

@ -198,3 +198,23 @@ function reconstruct(h::Haplotype)
end
return seq
end
"""
translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
Convert the variations in `hap` to a new reference sequence based upon `aln`. The alignment
rules follow the conventions of
[`translate(::Variation, PairwiseAlignment)`](@ref translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T}).
Indels at the beginning or end may not be preserved. Returns a new
[`Haplotype`](@ref)
"""
function translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
vars = variations(hap)
new_ref = BA.sequence(aln)
translated_vars = Variation{S,T}[]
for v in vars
new_v = translate(v, aln)
isnothing(new_v) || push!(translated_vars, new_v)
end
return Haplotype(new_ref, translated_vars)
end

@ -138,7 +138,7 @@ function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
if iszero(pos)
(s, r), _ = iterate(aln)
(isgap(s) | isgap(r)) && return Inapplicable()
return Variation{S,T}(seq, Edit{S,T}(Insertion(var.edit.x), 0))
return Variation{S,T}(seq, Edit{S,T}(Insertion(kind.seq), 0))
end
(seqpos, op) = BA.ref2seq(aln, pos)
@ -153,18 +153,19 @@ function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
# If it's a deletion, return nothing if the deleted part is already missing
# from the new reference.
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
start = seqpos + op == BA.OP_DELETE
start < stop && return nothing
edit = Edit{S,T}(Deletion(stop - start + 1), start)
start = seqpos + (op == BA.OP_DELETE)
del_len = stop - start + 1
del_len > 0 || return nothing
edit = Edit{S,T}(Deletion(del_len), start)
return Variation{S,T}(seq, edit, Unsafe())
else
# If it maps directly to a symbol, just insert
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
# This happens if there is already an insertion at the position
if pos != lastindex(ref) && first(ref2seq(aln, pos + 1)) != seqpos + 1
if pos != lastindex(ref) && first(BA.ref2seq(aln, pos + 1)) != seqpos + 1
return Inapplicable()
else
edit = Edit{S,T}(Insertion(var.edit.x), seqpos)
edit = Edit{S,T}(Insertion(mutation(var).seq), seqpos)
return Variation{S,T}(seq, edit, Unsafe())
end
# Alternatively, it can map to a deletion. In that case, it become really

@ -24,16 +24,25 @@ TODO now:
"""
using Aqua
using BioAlignments
using BioSequences
using BioAlignments:
AffineGapScoreModel,
GlobalAlignment,
EDNAFULL,
pairalign,
Alignment,
AlignedSequence,
PairwiseAlignment
using BioSequences: BioSequence, @dna_str, ungap!
using BioSymbols: DNA_A
using SequenceVariation
using Test
const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_extend=-2)
const DNA_MODEL = AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_extend=-2)
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
seq3 = ungap!(dna"-GATGCGTGT-AGCAACACTTATCGC-")
var = Haplotype(align(seq1, seq2))
@testset "EditSorting" begin
@ -60,6 +69,16 @@ end
@test Variation(seq2, "A3T") < Variation(seq2, "T4A")
end
@testset "HaplotypeTranslation" begin
ref1 = seq2
ref2 = seq3
alt = seq1
@test all(
v -> v in Haplotype(align(alt, ref2)), variations(translate(var, align(ref2, ref1)))
)
end
@testset "VariationPosition" begin
refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT"
@ -95,6 +114,49 @@ end
@test first(variations(var)) == sub
end
@testset "VariationTranslation" begin
ref1 = seq2
ref2 = seq3
ref3 = copy(ref1)
ref3[1] = DNA_A
alt = seq1
ref2_on_ref1 = align(ref2, ref1)
alt_on_ref1 = align(alt, ref1)
ref1_on_alt = align(ref1, alt)
ref3_on_ref1 = align(ref3, ref1)
# Test insertion at position zero
@test translate(Variation(ref1, "0CAT"), ref2_on_ref1) ==
SequenceVariation.Inapplicable()
@test translate(Variation(ref1, "0CAT"), ref3_on_ref1) == Variation(ref3, "0CAT")
# Test substitution on deletion
@test isnothing(translate(Variation(ref1, "A17T"), ref1_on_alt))
# Test substitution to itself
@test isnothing(translate(Variation(ref1, "A23C"), ref2_on_ref1))
# Test simple substitution
@test translate(Variation(ref1, "T10A"), ref2_on_ref1) == Variation(ref2, "T9A")
# Test simple deletion
@test translate(Variation(ref1, "Δ17-18"), ref2_on_ref1) == Variation(ref2, "Δ16-17")
# Test deletion on deletion
@test isnothing(translate(Variation(ref1, "Δ17-17"), alt_on_ref1))
# Test simple insertion
@test translate(Variation(ref1, "6CAT"), ref2_on_ref1) == Variation(ref2, "5CAT")
# Test two insertions at the same position
@test translate(Variation(ref1, "10G"), alt_on_ref1) == SequenceVariation.Inapplicable()
# Test insertion on deletion
@test translate(Variation(ref1, "17CAT"), alt_on_ref1) ==
SequenceVariation.Inapplicable()
end
@testset "VariationBases" begin
# Test substition bases
@test refbases(Variation(dna"ATCGA", "C3G")) == dna"C"

Loading…
Cancel
Save