mirror of
https://github.com/MillironX/SequenceVariation.jl.git
synced 2024-11-24 14:19:55 +00:00
Compare commits
No commits in common. "4e6c781c602e4ad944802f37854122080b25c8df" and "f38f0edb00a1e6ef04b105ff7dd0e16379741939" have entirely different histories.
4e6c781c60
...
f38f0edb00
7 changed files with 13 additions and 118 deletions
|
@ -7,16 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- `translate` functionality for `Haplotype`s ([#31](https://github.com/BioJulia/SequenceVariation.jl/pull/31))
|
|
||||||
|
|
||||||
## [0.2.0] - 2023-01-10
|
## [0.2.0] - 2023-01-10
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- Tutorial-type documentation ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
- Tutorial-type documentation ([#28](https://github.com/BioJulia/SequenceVariation.jl/pull/28))
|
||||||
- `Base.isless` implementation for `Edit` and `Variation` ([#32](https://github.com/BioJulia/SequenceVariation.jl/pull/32))
|
- `Base.isless` implementation for `Edit` and `Variation` ([#31](https://github.com/BioJulia/SequenceVariation.jl/pull/31))
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,6 @@ Haplotype
|
||||||
reference(::Haplotype)
|
reference(::Haplotype)
|
||||||
variations
|
variations
|
||||||
reconstruct
|
reconstruct
|
||||||
translate(::Haplotype{S,T}, ::PairwiseAlignment{S,S}) where {S,T}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Variations
|
## Variations
|
||||||
|
@ -31,7 +30,7 @@ translate(::Haplotype{S,T}, ::PairwiseAlignment{S,S}) where {S,T}
|
||||||
Variation
|
Variation
|
||||||
reference(::Variation)
|
reference(::Variation)
|
||||||
mutation
|
mutation
|
||||||
translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T}
|
translate
|
||||||
refbases
|
refbases
|
||||||
altbases
|
altbases
|
||||||
```
|
```
|
||||||
|
|
|
@ -38,18 +38,3 @@ human2 = reconstruct(bos_human_haplotype)
|
||||||
human2 == bovine
|
human2 == bovine
|
||||||
human2 == human
|
human2 == human
|
||||||
```
|
```
|
||||||
|
|
||||||
## Reference switching
|
|
||||||
|
|
||||||
All variations within a haplotype can be mapped to a new reference sequence
|
|
||||||
given an alignment between the new and old references using the
|
|
||||||
[`translate`](@ref translate(::Haplotype{S,T}, ::PairwiseAlignment{S,S}) where {S,T})
|
|
||||||
function. This could be useful if variants were called against a reference
|
|
||||||
sequence for the entire species, but need to be analyzed as variants of a
|
|
||||||
subtype later.
|
|
||||||
|
|
||||||
```@repl call_variants
|
|
||||||
ovis_human_alignment =
|
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
|
|
||||||
SequenceVariation.translate(bos_ovis_haplotype, ovis_human_alignment)
|
|
||||||
```
|
|
||||||
|
|
|
@ -54,15 +54,13 @@ variations(bos_human_haplotype)
|
||||||
## Reference switching
|
## Reference switching
|
||||||
|
|
||||||
An individual variation can be mapped to a new reference sequence given an
|
An individual variation can be mapped to a new reference sequence given an
|
||||||
alignment between the new and old references using the
|
alignment between the new and old references using the [`translate`](@ref).
|
||||||
[`translate`](@ref translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T})
|
|
||||||
function.
|
|
||||||
|
|
||||||
```@repl call_variants
|
```@repl call_variants
|
||||||
ovis_human_alignment =
|
ovis_human_alignment =
|
||||||
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
|
PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine)
|
||||||
human_variation = first(variations(bos_ovis_haplotype))
|
human_variation = first(variations(bos_ovis_haplotype))
|
||||||
reference(ans) == bovine
|
reference(ans) == bovine
|
||||||
SequenceVariation.translate(human_variation, ovis_human_alignment)
|
SequenceVariation.translate(human_variation, ovis_human_haplotype)
|
||||||
reference(ans) == bovine
|
reference(ans) == bovine
|
||||||
```
|
```
|
||||||
|
|
|
@ -198,23 +198,3 @@ function reconstruct(h::Haplotype)
|
||||||
end
|
end
|
||||||
return seq
|
return seq
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
|
||||||
translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
|
||||||
|
|
||||||
Convert the variations in `hap` to a new reference sequence based upon `aln`. The alignment
|
|
||||||
rules follow the conventions of
|
|
||||||
[`translate(::Variation, PairwiseAlignment)`](@ref translate(::Variation{S,T}, ::PairwiseAlignment{S,S}) where {S,T}).
|
|
||||||
Indels at the beginning or end may not be preserved. Returns a new
|
|
||||||
[`Haplotype`](@ref)
|
|
||||||
"""
|
|
||||||
function translate(hap::Haplotype{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
|
||||||
vars = variations(hap)
|
|
||||||
new_ref = BA.sequence(aln)
|
|
||||||
translated_vars = Variation{S,T}[]
|
|
||||||
for v in vars
|
|
||||||
new_v = translate(v, aln)
|
|
||||||
isnothing(new_v) || push!(translated_vars, new_v)
|
|
||||||
end
|
|
||||||
return Haplotype(new_ref, translated_vars)
|
|
||||||
end
|
|
||||||
|
|
|
@ -138,7 +138,7 @@ function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
||||||
if iszero(pos)
|
if iszero(pos)
|
||||||
(s, r), _ = iterate(aln)
|
(s, r), _ = iterate(aln)
|
||||||
(isgap(s) | isgap(r)) && return Inapplicable()
|
(isgap(s) | isgap(r)) && return Inapplicable()
|
||||||
return Variation{S,T}(seq, Edit{S,T}(Insertion(kind.seq), 0))
|
return Variation{S,T}(seq, Edit{S,T}(Insertion(var.edit.x), 0))
|
||||||
end
|
end
|
||||||
|
|
||||||
(seqpos, op) = BA.ref2seq(aln, pos)
|
(seqpos, op) = BA.ref2seq(aln, pos)
|
||||||
|
@ -153,19 +153,18 @@ function translate(var::Variation{S,T}, aln::PairwiseAlignment{S,S}) where {S,T}
|
||||||
# If it's a deletion, return nothing if the deleted part is already missing
|
# If it's a deletion, return nothing if the deleted part is already missing
|
||||||
# from the new reference.
|
# from the new reference.
|
||||||
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
(stop, op2) = BA.ref2seq(aln, pos + length(kind) - 1)
|
||||||
start = seqpos + (op == BA.OP_DELETE)
|
start = seqpos + op == BA.OP_DELETE
|
||||||
del_len = stop - start + 1
|
start < stop && return nothing
|
||||||
del_len > 0 || return nothing
|
edit = Edit{S,T}(Deletion(stop - start + 1), start)
|
||||||
edit = Edit{S,T}(Deletion(del_len), start)
|
|
||||||
return Variation{S,T}(seq, edit, Unsafe())
|
return Variation{S,T}(seq, edit, Unsafe())
|
||||||
else
|
else
|
||||||
# If it maps directly to a symbol, just insert
|
# If it maps directly to a symbol, just insert
|
||||||
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
if op in (BA.OP_MATCH, BA.OP_SEQ_MATCH, BA.OP_SEQ_MISMATCH)
|
||||||
# This happens if there is already an insertion at the position
|
# This happens if there is already an insertion at the position
|
||||||
if pos != lastindex(ref) && first(BA.ref2seq(aln, pos + 1)) != seqpos + 1
|
if pos != lastindex(ref) && first(ref2seq(aln, pos + 1)) != seqpos + 1
|
||||||
return Inapplicable()
|
return Inapplicable()
|
||||||
else
|
else
|
||||||
edit = Edit{S,T}(Insertion(mutation(var).seq), seqpos)
|
edit = Edit{S,T}(Insertion(var.edit.x), seqpos)
|
||||||
return Variation{S,T}(seq, edit, Unsafe())
|
return Variation{S,T}(seq, edit, Unsafe())
|
||||||
end
|
end
|
||||||
# Alternatively, it can map to a deletion. In that case, it become really
|
# Alternatively, it can map to a deletion. In that case, it become really
|
||||||
|
|
|
@ -24,25 +24,16 @@ TODO now:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
using Aqua
|
using Aqua
|
||||||
using BioAlignments:
|
using BioAlignments
|
||||||
AffineGapScoreModel,
|
using BioSequences
|
||||||
GlobalAlignment,
|
|
||||||
EDNAFULL,
|
|
||||||
pairalign,
|
|
||||||
Alignment,
|
|
||||||
AlignedSequence,
|
|
||||||
PairwiseAlignment
|
|
||||||
using BioSequences: BioSequence, @dna_str, ungap!
|
|
||||||
using BioSymbols: DNA_A
|
|
||||||
using SequenceVariation
|
using SequenceVariation
|
||||||
using Test
|
using Test
|
||||||
|
|
||||||
const DNA_MODEL = AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_extend=-2)
|
const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_extend=-2)
|
||||||
|
|
||||||
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
|
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
|
||||||
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
|
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
|
||||||
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
|
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
|
||||||
seq3 = ungap!(dna"-GATGCGTGT-AGCAACACTTATCGC-")
|
|
||||||
var = Haplotype(align(seq1, seq2))
|
var = Haplotype(align(seq1, seq2))
|
||||||
|
|
||||||
@testset "EditSorting" begin
|
@testset "EditSorting" begin
|
||||||
|
@ -69,16 +60,6 @@ end
|
||||||
@test Variation(seq2, "A3T") < Variation(seq2, "T4A")
|
@test Variation(seq2, "A3T") < Variation(seq2, "T4A")
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "HaplotypeTranslation" begin
|
|
||||||
ref1 = seq2
|
|
||||||
ref2 = seq3
|
|
||||||
alt = seq1
|
|
||||||
|
|
||||||
@test all(
|
|
||||||
v -> v in Haplotype(align(alt, ref2)), variations(translate(var, align(ref2, ref1)))
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
@testset "VariationPosition" begin
|
@testset "VariationPosition" begin
|
||||||
refseq = dna"ACAACTTTATCT"
|
refseq = dna"ACAACTTTATCT"
|
||||||
mutseq = dna"ACATCTTTATCT"
|
mutseq = dna"ACATCTTTATCT"
|
||||||
|
@ -114,49 +95,6 @@ end
|
||||||
@test first(variations(var)) == sub
|
@test first(variations(var)) == sub
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "VariationTranslation" begin
|
|
||||||
ref1 = seq2
|
|
||||||
ref2 = seq3
|
|
||||||
ref3 = copy(ref1)
|
|
||||||
ref3[1] = DNA_A
|
|
||||||
alt = seq1
|
|
||||||
|
|
||||||
ref2_on_ref1 = align(ref2, ref1)
|
|
||||||
alt_on_ref1 = align(alt, ref1)
|
|
||||||
ref1_on_alt = align(ref1, alt)
|
|
||||||
ref3_on_ref1 = align(ref3, ref1)
|
|
||||||
|
|
||||||
# Test insertion at position zero
|
|
||||||
@test translate(Variation(ref1, "0CAT"), ref2_on_ref1) ==
|
|
||||||
SequenceVariation.Inapplicable()
|
|
||||||
@test translate(Variation(ref1, "0CAT"), ref3_on_ref1) == Variation(ref3, "0CAT")
|
|
||||||
|
|
||||||
# Test substitution on deletion
|
|
||||||
@test isnothing(translate(Variation(ref1, "A17T"), ref1_on_alt))
|
|
||||||
|
|
||||||
# Test substitution to itself
|
|
||||||
@test isnothing(translate(Variation(ref1, "A23C"), ref2_on_ref1))
|
|
||||||
|
|
||||||
# Test simple substitution
|
|
||||||
@test translate(Variation(ref1, "T10A"), ref2_on_ref1) == Variation(ref2, "T9A")
|
|
||||||
|
|
||||||
# Test simple deletion
|
|
||||||
@test translate(Variation(ref1, "Δ17-18"), ref2_on_ref1) == Variation(ref2, "Δ16-17")
|
|
||||||
|
|
||||||
# Test deletion on deletion
|
|
||||||
@test isnothing(translate(Variation(ref1, "Δ17-17"), alt_on_ref1))
|
|
||||||
|
|
||||||
# Test simple insertion
|
|
||||||
@test translate(Variation(ref1, "6CAT"), ref2_on_ref1) == Variation(ref2, "5CAT")
|
|
||||||
|
|
||||||
# Test two insertions at the same position
|
|
||||||
@test translate(Variation(ref1, "10G"), alt_on_ref1) == SequenceVariation.Inapplicable()
|
|
||||||
|
|
||||||
# Test insertion on deletion
|
|
||||||
@test translate(Variation(ref1, "17CAT"), alt_on_ref1) ==
|
|
||||||
SequenceVariation.Inapplicable()
|
|
||||||
end
|
|
||||||
|
|
||||||
@testset "VariationBases" begin
|
@testset "VariationBases" begin
|
||||||
# Test substition bases
|
# Test substition bases
|
||||||
@test refbases(Variation(dna"ATCGA", "C3G")) == dna"C"
|
@test refbases(Variation(dna"ATCGA", "C3G")) == dna"C"
|
||||||
|
|
Loading…
Reference in a new issue