Minimal code adjustments for working separation

- Update to use GenomicFeatures v2. - BioAlignments v2. - BioSequences v2. - Indexes v0.1.
2024-11-23 10:19:56 +00:00 · 2020-02-20 21:19:07 +11:00 · 2020-02-20 21:19:07 +11:00 · 1a3c986152
commit 1a3c986152
parent 7a56931b90
11 changed files with 76 additions and 31 deletions
--- a/Project.toml
+++ b/Project.toml
@ -2,3 +2,32 @@ name = "XAM"
 uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
 authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
 version = "0.1.0"
 [deps]
 Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
 BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
 BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
 BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
 BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
 GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
 Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 [compat]
 Automa = "0.7, 0.8"
 BGZFStreams = "0.3"
 BioAlignments = "2"
 BioCore = "2"
 BioSequences = "2"
 BufferedStreams = "1"
 GenomicFeatures = "2"
 Indexes = "0.1"
 julia = "1.1"
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 [targets]
 test = ["Test", "YAML"]
--- a/src/XAM.jl
+++ b/src/XAM.jl
@ -1,10 +1,13 @@
 module XAM
 export
-    BAM,
+    SAM,
-    SAM
+    BAM
 include("sam/sam.jl")
 include("bam/bam.jl")
 using .SAM
 using .BAM
 end # module
--- a/src/bam/bai.jl
+++ b/src/bam/bai.jl
@ -7,7 +7,7 @@
 # An index type for the BAM file format.
 struct BAI
    # BGZF file index
-    index::GenomicFeatures.Indexes.BGZFIndex
+    index::Indexes.BGZFIndex
    # number of unmapped reads
    n_no_coors::Union{Nothing, Int}
@ -44,7 +44,7 @@ function read_bai(input::IO)
    # read contents
    n_refs = read(input, Int32)
-    index = GenomicFeatures.Indexes.read_bgzfindex(input, n_refs)
+    index = Indexes.read_bgzfindex(input, n_refs)
    if !eof(input)
        n_no_coors = read(input, UInt64)
    else
--- a/src/bam/bam.jl
+++ b/src/bam/bam.jl
@ -3,11 +3,18 @@
 module BAM
 using BioCore
 using GenomicFeatures
 using XAM.SAM
 import BGZFStreams
-import BioAlignments: BioAlignments, SAM
+import BioAlignments
-import GenomicFeatures: GenomicFeatures, Interval
+import Indexes
 import BioSequences
-import BioCore: BioCore, isfilled
+import BioCore: isfilled, header
 import GenomicFeatures: eachoverlap
 include("bai.jl")
 include("auxdata.jl")
--- a/src/bam/overlap.jl
+++ b/src/bam/overlap.jl
@ -36,7 +36,7 @@ mutable struct OverlapIteratorState
    refindex::Int
    # possibly overlapping chunks
-    chunks::Vector{GenomicFeatures.Indexes.Chunk}
+    chunks::Vector{Indexes.Chunk}
    # current chunk index
    chunkid::Int
@ -51,7 +51,7 @@ function Base.iterate(iter::OverlapIterator)
        throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
    end
    @assert iter.reader.index !== nothing
-    chunks = GenomicFeatures.Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
+    chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
    if !isempty(chunks)
        seek(iter.reader, first(chunks).start)
    end
--- a/src/bam/reader.jl
+++ b/src/bam/reader.jl
@ -66,10 +66,6 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
    return header
 end
 function BioCore.header(reader::Reader)
    return header(reader)
 end
 function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
    seek(reader.stream, voffset)
 end
--- a/src/bam/record.jl
+++ b/src/bam/record.jl
@ -477,11 +477,11 @@ function hastemplength(record::Record)
 end
 """
-    sequence(record::Record)::BioSequences.DNASequence
+    sequence(record::Record)::BioSequences.LongDNASeq
 Get the segment sequence of `record`.
 """
-function sequence(record::Record)::BioSequences.DNASequence
+function sequence(record::Record)::BioSequences.LongDNASeq
    checkfilled(record)
    seqlen = seqlength(record)
    data = Vector{UInt64}(undef, cld(seqlen, 16))
@ -491,7 +491,7 @@ function sequence(record::Record)::BioSequences.DNASequence
        x = unsafe_load(src, i)
        data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
    end
-    return BioSequences.DNASequence(data, 1:seqlen, false)
+    return BioSequences.LongDNASeq(data, 1:seqlen, false)
 end
 function hassequence(record::Record)
--- a/src/sam/reader.jl
+++ b/src/sam/reader.jl
@ -38,10 +38,6 @@ function header(reader::Reader)::Header
    return reader.header
 end
 function BioCore.header(reader::Reader)
    return header(reader)
 end
 function Base.eltype(::Type{Reader})
    return Record
 end
--- a/src/sam/record.jl
+++ b/src/sam/record.jl
@ -370,17 +370,17 @@ function hastemplength(record::Record)
 end
 """
-    sequence(record::Record)::BioSequences.DNASequence
+    sequence(record::Record)::BioSequences.LongDNASeq
 Get the segment sequence of `record`.
 """
-function sequence(record::Record)::BioSequences.DNASequence
+function sequence(record::Record)::BioSequences.LongDNASeq
    checkfilled(record)
    if ismissing(record, record.seq)
        missingerror(:sequence)
    end
    seqlen = length(record.seq)
-    ret = BioSequences.DNASequence(seqlen)
+    ret = BioSequences.LongDNASeq(seqlen)
    BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
    return ret
 end
--- a/src/sam/sam.jl
+++ b/src/sam/sam.jl
@ -3,12 +3,14 @@
 module SAM
 using BioCore
 import Automa
 import Automa.RegExp: @re_str
 import BioAlignments
 import BioCore.Exceptions: missingerror
 import BioCore.RecordHelper: unsafe_parse_decimal
-import BioCore: BioCore, isfilled
+import BioCore: isfilled, header
 import BioSequences
 import BufferedStreams
 using Printf: @sprintf
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -1,13 +1,25 @@
 using Test
-using BioAlignments
+using GenomicFeatures
-using BioSymbols
+using XAM
 import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
 import BGZFStreams: BGZFStream
 import BioCore.Exceptions: MissingFieldException
 import BioCore.Testing.get_bio_fmt_specimens
 import BioSequences: @dna_str, @aa_str
 import GenomicFeatures
 import YAML
 import BioCore:
    header,
    isfilled,
    seqname,
    hasseqname,
    sequence,
    hassequence,
    leftposition,
    rightposition,
    hasleftposition,
    hasrightposition
 # Generate a random range within `range`.
 function randrange(range)
    x = rand(range)
@ -68,12 +80,12 @@ end
        record = SAM.Record()
        @test !isfilled(record)
        @test !SAM.ismapped(record)
-        @test repr(record) == "BioAlignments.SAM.Record: <not filled>"
+        @test repr(record) == "XAM.SAM.Record: <not filled>"
        @test_throws ArgumentError SAM.flag(record)
        record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
        @test isfilled(record)
-        @test occursin(r"^BioAlignments.SAM.Record:\n", repr(record))
+        @test occursin(r"^XAM.SAM.Record:\n", repr(record))
        @test SAM.ismapped(record)
        @test SAM.isprimary(record)
        @test SAM.hastempname(record)
@ -217,7 +229,7 @@ end
    @testset "Record" begin
        record = BAM.Record()
        @test !isfilled(record)
-        @test repr(record) == "BioAlignments.BAM.Record: <not filled>"
+        @test repr(record) == "XAM.BAM.Record: <not filled>"
        @test_throws ArgumentError BAM.flag(record)
    end
@ -225,7 +237,7 @@ end
        reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
        @test isa(reader, BAM.Reader)
        @test eltype(reader) === BAM.Record
-        @test startswith(repr(reader), "BioAlignments.BAM.Reader{IOStream}:")
+        @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
        # header
        h = header(reader)