From 6510ee3fc030992f93562c4c072b57f2b2c621c2 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 3 Jan 2023 16:01:59 -0600 Subject: [PATCH] Add tutorial-type documentation --- docs/Project.toml | 10 +++++++ docs/make.jl | 7 +++++ docs/src/compare.md | 47 ++++++++++++++++++++++++++++++ docs/src/variants.md | 40 +++++++++++++++++++++++++ docs/src/variations.md | 66 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 170 insertions(+) create mode 100644 docs/src/compare.md create mode 100644 docs/src/variants.md create mode 100644 docs/src/variations.md diff --git a/docs/Project.toml b/docs/Project.toml index b284c6f..3274402 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,16 @@ [deps] +BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" +BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SequenceVariation = "eef6e190-9969-4f06-a38f-35a110a8fdc8" [compat] +BioAlignments = "3" +BioSequences = "3" +BioSymbols = "5" Documenter = "0.27" +Revise = "3.4" +julia = "1.6" diff --git a/docs/make.jl b/docs/make.jl index e723972..73c5950 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,6 +1,10 @@ using Pkg using Documenter using SequenceVariation +using Revise + +# see https://github.com/tlienart/LiveServer.jl/issues/140#issuecomment-1271591251 +Revise.revise() makedocs(; checkdocs = :exports, @@ -10,6 +14,9 @@ makedocs(; modules = [SequenceVariation], pages = [ "Home" => "index.md", + "Working with variants" => "variants.md", + "Working with variations" => "variations.md", + "Comparing variations" => "compare.md", "API Reference" => "api.md", ], authors = replace(join(Pkg.TOML.parsefile("Project.toml")["authors"], ", "), r" <.*?>" => "" ) * ", The BioJulia Organisation, and other contributors." diff --git a/docs/src/compare.md b/docs/src/compare.md new file mode 100644 index 0000000..9ee3693 --- /dev/null +++ b/docs/src/compare.md @@ -0,0 +1,47 @@ +```@meta +CurrentModule = SequenceVariation +``` + +# Comparing variations in sequences + +## Checking for variations in a known variant + +Looking for a known [`Variation`](@ref) within a [`Variant`](@ref) is +efficiently accomplished using the `in` operator. + +```@setup call_variants +using SequenceVariation, BioAlignments, BioSequences + +bovine = dna"GACCGGCTGCATTCGAGGCTGCCAGCAAGCAG"; +ovine = dna"GACCGGCTGCATTCGAGGCTGTCAGCAAACAG"; +human = dna"GACAGGCTGCATCAGAAGAGGCCATCAAGCAG"; + +bos_ovis_alignment = + PairwiseAlignment(AlignedSequence(ovine, Alignment("32M", 1, 1)), bovine); +bos_human_alignment = + PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine); + +bos_ovis_variant = Variant(bos_ovis_alignment) +bos_human_variant = Variant(bos_human_alignment) +``` + +```@example call_variants +println("\tOvis aires\tHomo sapiens") +for v in vcat(variations(bos_ovis_variant), variations(bos_human_variant)) + is_sheep = v in bos_ovis_variant + is_human = v in bos_human_variant + println("$v\t$is_sheep\t\t$is_human") +end +``` + +## Constructing new variants based on other variations + +New variants can be constructed using variations. This might be useful to pool +variations found on different reads or to filter variations from a variant +that aren't validated by another variant. + +```@repl call_variants +sheeple = vcat(variations(bos_ovis_variant), variations(bos_human_variant)); +Variant(bovine, sheeple) +reconstruct!(bovine, ans) +``` diff --git a/docs/src/variants.md b/docs/src/variants.md new file mode 100644 index 0000000..c04abf0 --- /dev/null +++ b/docs/src/variants.md @@ -0,0 +1,40 @@ +```@meta +CurrentModule = SequenceVariation +``` + +# Working with variants + +## Calling variants + +The first step in working with sequence variation is to identify (call) +variations. SequenceVariation can directly call variants using the +`Variant(::PairwiseAlignment)` constructor of the [`Variant`](@ref) type. + +```@repl call_variants +using SequenceVariation, BioAlignments, BioSequences + +bovine = dna"GACCGGCTGCATTCGAGGCTGCCAGCAAGCAG"; +ovine = dna"GACCGGCTGCATTCGAGGCTGTCAGCAAACAG"; +human = dna"GACAGGCTGCATCAGAAGAGGCCATCAAGCAG"; + +bos_ovis_alignment = + PairwiseAlignment(AlignedSequence(ovine, Alignment("32M", 1, 1)), bovine); +bos_human_alignment = + PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine); + +bos_ovis_variant = Variant(bos_ovis_alignment) +bos_human_variant = Variant(bos_human_alignment) +``` + +## Sequence reconstruction + +If the alternate sequence of a variant is no longer available (as is often the +case when calling variants from alignment files), then the sequence can be +retrieved using the [`reconstruct!`](@ref) function. + +```@repl call_variants +human2 = copy(bovine); +reconstruct!(human2, bos_human_variant) +human2 == bovine +human2 == human +``` diff --git a/docs/src/variations.md b/docs/src/variations.md new file mode 100644 index 0000000..229bae5 --- /dev/null +++ b/docs/src/variations.md @@ -0,0 +1,66 @@ +```@meta +CurrentModule = SequenceVariation +``` + +# Working with individual variations + +## Construction + +Individual [`Variation`](@ref)s can be made using a reference sequence and +string syntax + +| Variation type | Syntax | Interpretation | Example | +|:--- |:--- |:--- |:--- | +| Substitutions | `` | `` is substituted for `` in position `` | `"G16C"` | +| Deletions | `Δ-` | All bases (inclusive) between `` and `` are deleted. It is valid to have `` equal ``: that is a deletion of one base. | `"Δ1-2"` | +| Insertions | `` | `` is inserted between positions `` and `+1` | `"11T"` | + +```@repl +using BioSequences: @dna_str +using SequenceVariation +bovine_ins = dna"GACCGGCTGCATTCGAGGCTGCCAGCAAGCAG" +Variation(bovine_ins, "C4A") +mutation(ans) +typeof(mutation(Variation(bovine_ins, "Δ13-14"))) +mutation(Variation(bovine_ins, "25ACA")) +``` + +## Extraction + +Sequence variations may also be extracted wholesale from a [`Variant`](@ref) +using the [`variations`](@ref) function. + +```@setup call_variants +using SequenceVariation, BioAlignments, BioSequences + +bovine = dna"GACCGGCTGCATTCGAGGCTGCCAGCAAGCAG"; +ovine = dna"GACCGGCTGCATTCGAGGCTGTCAGCAAACAG"; +human = dna"GACAGGCTGCATCAGAAGAGGCCATCAAGCAG"; + +bos_ovis_alignment = + PairwiseAlignment(AlignedSequence(ovine, Alignment("32M", 1, 1)), bovine); +bos_human_alignment = + PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), bovine); + +bos_ovis_variant = Variant(bos_ovis_alignment) +bos_human_variant = Variant(bos_human_alignment) +``` + +```@repl call_variants +variations(bos_ovis_variant) +variations(bos_human_variant) +``` + +## Reference switching + +An individual variation can be mapped to a new reference sequence given an +alignment between the new and old references using the [`translate`](@ref). + +```@repl call_variants +ovis_human_alignment = + PairwiseAlignment(AlignedSequence(human, Alignment("32M", 1, 1)), ovine) +human_variation = first(variations(bos_ovis_variant)) +reference(ans) == bovine +SequenceVariation.translate(human_variation, ovis_human_alignment) +reference(ans) == bovine +```