SequenceVariation.jl/test/runtests.jl

152 lines
4.7 KiB
Julia
Raw Normal View History

2022-01-26 13:59:19 +00:00
"""
Needs to be able to:
2023-01-04 18:45:25 +00:00
* Given a sequence and a reference, create a `Haplotype` that unambiguously represents
2022-01-26 13:59:19 +00:00
the sequence
2023-01-04 18:45:25 +00:00
* Given a `Haplotype` and a new reference, translate the variant to the new reference.
2022-01-26 13:59:19 +00:00
* Given a mutation and a reference and a sequence, determine if the sequence has that
mutation
TODO now:
* Create a string repr and parser for Edit, perhaps
* A243T for sub
* 119TAGGCTA for insertion
* TGAGCTA9 for deletion
* Create a parser + print/show for edit
* Play around with some NGS results rel. to picked reference.
* Is it easy to construct ref and variants? I.e. is API nice?
* Is it nice and easy to check if a mut is present?
*
* Implement "reference switching".
* Add tests
"""
2023-01-03 22:22:04 +00:00
using Aqua
2022-01-26 13:59:19 +00:00
using BioAlignments
2022-06-14 21:24:55 +00:00
using BioSequences
2022-01-26 13:59:19 +00:00
using SequenceVariation
2022-06-14 21:24:55 +00:00
using Test
2022-01-26 13:59:19 +00:00
2023-01-04 03:59:42 +00:00
const DNA_MODEL = BioAlignments.AffineGapScoreModel(EDNAFULL; gap_open=-25, gap_extend=-2)
2022-01-26 13:59:19 +00:00
align(a::BioSequence, b::BioSequence) = pairalign(GlobalAlignment(), a, b, DNA_MODEL).aln
seq1 = ungap!(dna"--ATGCGTGTTAGCAAC--TTATCGCG")
seq2 = ungap!(dna"TGATGCGTGT-AGCAACACTTATAGCG")
2023-01-04 18:45:25 +00:00
var = Haplotype(align(seq1, seq2))
2023-01-04 18:45:25 +00:00
@testset "HaplotypeRoundtrip" begin
for v in variations(var)
@test v in var
2023-01-04 18:45:25 +00:00
@test v in Haplotype(seq2, [v])
end
end
@testset "VariationPosition" begin
refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT"
read01 = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1))
read02 = AlignedSequence(mutseq[3:12], Alignment("10M", 1, 3))
aln01 = PairwiseAlignment(read01, refseq)
aln02 = PairwiseAlignment(read02, refseq)
2023-01-04 18:45:25 +00:00
@test Haplotype(aln01).edits == Haplotype(aln02).edits
end
2022-06-14 21:24:55 +00:00
@testset "VariationParsing" begin
refseq = dna"ACAACTTTATCT"
sub = Variation(refseq, "A4T")
del = Variation(refseq, "Δ4-5")
ins = Variation(refseq, "4TT")
@test mutation(sub) isa Substitution
@test mutation(del) isa Deletion
@test mutation(ins) isa Insertion
end
2022-06-15 18:44:27 +00:00
@testset "VariationRetrieval" begin
refseq = dna"ACAACTTTATCT"
mutseq = dna"ACATCTTTATCT"
read = AlignedSequence(mutseq[1:10], Alignment("10M", 1, 1))
aln = PairwiseAlignment(read, refseq)
2023-01-04 18:45:25 +00:00
var = Haplotype(aln)
2022-06-15 18:44:27 +00:00
sub = Variation(refseq, "A4T")
@test first(variations(var)) == sub
end
@testset "VariationBases" begin
# Test substition bases
@test refbases(Variation(dna"ATCGA", "C3G")) == dna"C"
@test altbases(Variation(dna"ATCGA", "C3G")) == dna"G"
# Test single deletion bases
@test refbases(Variation(dna"ATCGA", "Δ3-3")) == dna"TC"
@test altbases(Variation(dna"ATCGA", "Δ3-3")) == dna"T"
# Test multiple deletion bases
@test refbases(Variation(dna"ATCGA", "Δ3-4")) == dna"TCG"
@test altbases(Variation(dna"ATCGA", "Δ3-4")) == dna"T"
# Test first position deletion
@test refbases(Variation(dna"ATCGA", "Δ1-1")) == dna"AT"
@test altbases(Variation(dna"ATCGA", "Δ1-1")) == dna"T"
# Test single insertion bases
@test refbases(Variation(dna"ATCGA", "3A")) == dna"C"
@test altbases(Variation(dna"ATCGA", "3A")) == dna"CA"
# Test multiple insertion bases
@test refbases(Variation(dna"ATCGA", "3TAG")) == dna"C"
@test altbases(Variation(dna"ATCGA", "3TAG")) == dna"CTAG"
# Test first position insertion
@test refbases(Variation(dna"ATCGA", "1C")) == dna"A"
@test altbases(Variation(dna"ATCGA", "1C")) == dna"CA"
end
2022-12-05 21:34:39 +00:00
2023-01-04 18:45:25 +00:00
@testset "SoftclipHaplotype" begin
2022-12-05 21:34:39 +00:00
refseq = dna"GATTACA"
mutseq = dna"GATTACAAAA"
2023-01-04 18:45:25 +00:00
refvar = Haplotype(refseq, SequenceVariation.Edit{typeof(refseq),eltype(refseq)}[])
2022-12-05 21:34:39 +00:00
# Test for ending soft clip
2023-01-04 18:45:25 +00:00
@test Haplotype(
2023-01-04 03:59:42 +00:00
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S", 1, 1)), refseq)
) == refvar
2022-12-06 16:01:46 +00:00
# Test for ending soft+hard clip
2023-01-04 18:45:25 +00:00
@test Haplotype(
2023-01-04 03:59:42 +00:00
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3S2H", 1, 1)), refseq)
) == refvar
2022-12-06 16:02:17 +00:00
# Test that ending insertions are still valid
2023-01-04 03:59:42 +00:00
@test length(
2023-01-04 18:45:25 +00:00
Haplotype(
2023-01-04 03:59:42 +00:00
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3I", 1, 1)), refseq)
).edits,
) == 1
# Test that out-of-bounds bases are still caught
2023-01-04 18:45:25 +00:00
@test_throws BoundsError Haplotype(
2023-01-04 03:59:42 +00:00
PairwiseAlignment(AlignedSequence(mutseq, Alignment("7=3X", 1, 1)), refseq)
)
2022-12-05 21:34:39 +00:00
end
2023-01-03 22:22:04 +00:00
@testset "Aqua" begin
2023-01-04 03:59:42 +00:00
Aqua.test_ambiguities(SequenceVariation; recursive=false)
2023-01-03 22:22:04 +00:00
# TODO: Refactor `Edit` so that this test doesn't fail
# TODO: This test _should_ be set to @test_fails, but Aqua's syntax doesn't allow that
# Aqua.test_unbound_args(SequenceVariation)
Aqua.test_undefined_exports(SequenceVariation)
Aqua.test_piracy(SequenceVariation)
Aqua.test_project_extras(SequenceVariation)
Aqua.test_stale_deps(SequenceVariation)
Aqua.test_deps_compat(SequenceVariation)
Aqua.test_project_toml_formatting(SequenceVariation)
end