1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-11-23 10:19:56 +00:00

Minimal code adjustments for working separation

- Update to use GenomicFeatures v2.
- BioAlignments v2.
- BioSequences v2.
- Indexes v0.1.
This commit is contained in:
Ciarán O'Mara 2020-02-20 21:19:07 +11:00
parent 7a56931b90
commit 1a3c986152
11 changed files with 76 additions and 31 deletions

View file

@ -2,3 +2,32 @@ name = "XAM"
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"] authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
version = "0.1.0" version = "0.1.0"
[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[compat]
Automa = "0.7, 0.8"
BGZFStreams = "0.3"
BioAlignments = "2"
BioCore = "2"
BioSequences = "2"
BufferedStreams = "1"
GenomicFeatures = "2"
Indexes = "0.1"
julia = "1.1"
[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
[targets]
test = ["Test", "YAML"]

View file

@ -1,10 +1,13 @@
module XAM module XAM
export export
BAM, SAM,
SAM BAM
include("sam/sam.jl") include("sam/sam.jl")
include("bam/bam.jl") include("bam/bam.jl")
using .SAM
using .BAM
end # module end # module

View file

@ -7,7 +7,7 @@
# An index type for the BAM file format. # An index type for the BAM file format.
struct BAI struct BAI
# BGZF file index # BGZF file index
index::GenomicFeatures.Indexes.BGZFIndex index::Indexes.BGZFIndex
# number of unmapped reads # number of unmapped reads
n_no_coors::Union{Nothing, Int} n_no_coors::Union{Nothing, Int}
@ -44,7 +44,7 @@ function read_bai(input::IO)
# read contents # read contents
n_refs = read(input, Int32) n_refs = read(input, Int32)
index = GenomicFeatures.Indexes.read_bgzfindex(input, n_refs) index = Indexes.read_bgzfindex(input, n_refs)
if !eof(input) if !eof(input)
n_no_coors = read(input, UInt64) n_no_coors = read(input, UInt64)
else else

View file

@ -3,11 +3,18 @@
module BAM module BAM
using BioCore
using GenomicFeatures
using XAM.SAM
import BGZFStreams import BGZFStreams
import BioAlignments: BioAlignments, SAM import BioAlignments
import GenomicFeatures: GenomicFeatures, Interval import Indexes
import BioSequences import BioSequences
import BioCore: BioCore, isfilled import BioCore: isfilled, header
import GenomicFeatures: eachoverlap
include("bai.jl") include("bai.jl")
include("auxdata.jl") include("auxdata.jl")

View file

@ -36,7 +36,7 @@ mutable struct OverlapIteratorState
refindex::Int refindex::Int
# possibly overlapping chunks # possibly overlapping chunks
chunks::Vector{GenomicFeatures.Indexes.Chunk} chunks::Vector{Indexes.Chunk}
# current chunk index # current chunk index
chunkid::Int chunkid::Int
@ -51,7 +51,7 @@ function Base.iterate(iter::OverlapIterator)
throw(ArgumentError("sequence name $(iter.refname) is not found in the header")) throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
end end
@assert iter.reader.index !== nothing @assert iter.reader.index !== nothing
chunks = GenomicFeatures.Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval) chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
if !isempty(chunks) if !isempty(chunks)
seek(iter.reader, first(chunks).start) seek(iter.reader, first(chunks).start)
end end

View file

@ -66,10 +66,6 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
return header return header
end end
function BioCore.header(reader::Reader)
return header(reader)
end
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset) function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
seek(reader.stream, voffset) seek(reader.stream, voffset)
end end

View file

@ -477,11 +477,11 @@ function hastemplength(record::Record)
end end
""" """
sequence(record::Record)::BioSequences.DNASequence sequence(record::Record)::BioSequences.LongDNASeq
Get the segment sequence of `record`. Get the segment sequence of `record`.
""" """
function sequence(record::Record)::BioSequences.DNASequence function sequence(record::Record)::BioSequences.LongDNASeq
checkfilled(record) checkfilled(record)
seqlen = seqlength(record) seqlen = seqlength(record)
data = Vector{UInt64}(undef, cld(seqlen, 16)) data = Vector{UInt64}(undef, cld(seqlen, 16))
@ -491,7 +491,7 @@ function sequence(record::Record)::BioSequences.DNASequence
x = unsafe_load(src, i) x = unsafe_load(src, i)
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4 data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
end end
return BioSequences.DNASequence(data, 1:seqlen, false) return BioSequences.LongDNASeq(data, 1:seqlen, false)
end end
function hassequence(record::Record) function hassequence(record::Record)

View file

@ -38,10 +38,6 @@ function header(reader::Reader)::Header
return reader.header return reader.header
end end
function BioCore.header(reader::Reader)
return header(reader)
end
function Base.eltype(::Type{Reader}) function Base.eltype(::Type{Reader})
return Record return Record
end end

View file

@ -370,17 +370,17 @@ function hastemplength(record::Record)
end end
""" """
sequence(record::Record)::BioSequences.DNASequence sequence(record::Record)::BioSequences.LongDNASeq
Get the segment sequence of `record`. Get the segment sequence of `record`.
""" """
function sequence(record::Record)::BioSequences.DNASequence function sequence(record::Record)::BioSequences.LongDNASeq
checkfilled(record) checkfilled(record)
if ismissing(record, record.seq) if ismissing(record, record.seq)
missingerror(:sequence) missingerror(:sequence)
end end
seqlen = length(record.seq) seqlen = length(record.seq)
ret = BioSequences.DNASequence(seqlen) ret = BioSequences.LongDNASeq(seqlen)
BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen) BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
return ret return ret
end end

View file

@ -3,12 +3,14 @@
module SAM module SAM
using BioCore
import Automa import Automa
import Automa.RegExp: @re_str import Automa.RegExp: @re_str
import BioAlignments import BioAlignments
import BioCore.Exceptions: missingerror import BioCore.Exceptions: missingerror
import BioCore.RecordHelper: unsafe_parse_decimal import BioCore.RecordHelper: unsafe_parse_decimal
import BioCore: BioCore, isfilled import BioCore: isfilled, header
import BioSequences import BioSequences
import BufferedStreams import BufferedStreams
using Printf: @sprintf using Printf: @sprintf

View file

@ -1,13 +1,25 @@
using Test using Test
using BioAlignments using GenomicFeatures
using BioSymbols using XAM
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
import BGZFStreams: BGZFStream import BGZFStreams: BGZFStream
import BioCore.Exceptions: MissingFieldException import BioCore.Exceptions: MissingFieldException
import BioCore.Testing.get_bio_fmt_specimens import BioCore.Testing.get_bio_fmt_specimens
import BioSequences: @dna_str, @aa_str import BioSequences: @dna_str, @aa_str
import GenomicFeatures
import YAML import YAML
import BioCore:
header,
isfilled,
seqname,
hasseqname,
sequence,
hassequence,
leftposition,
rightposition,
hasleftposition,
hasrightposition
# Generate a random range within `range`. # Generate a random range within `range`.
function randrange(range) function randrange(range)
x = rand(range) x = rand(range)
@ -68,12 +80,12 @@ end
record = SAM.Record() record = SAM.Record()
@test !isfilled(record) @test !isfilled(record)
@test !SAM.ismapped(record) @test !SAM.ismapped(record)
@test repr(record) == "BioAlignments.SAM.Record: <not filled>" @test repr(record) == "XAM.SAM.Record: <not filled>"
@test_throws ArgumentError SAM.flag(record) @test_throws ArgumentError SAM.flag(record)
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*") record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
@test isfilled(record) @test isfilled(record)
@test occursin(r"^BioAlignments.SAM.Record:\n", repr(record)) @test occursin(r"^XAM.SAM.Record:\n", repr(record))
@test SAM.ismapped(record) @test SAM.ismapped(record)
@test SAM.isprimary(record) @test SAM.isprimary(record)
@test SAM.hastempname(record) @test SAM.hastempname(record)
@ -217,7 +229,7 @@ end
@testset "Record" begin @testset "Record" begin
record = BAM.Record() record = BAM.Record()
@test !isfilled(record) @test !isfilled(record)
@test repr(record) == "BioAlignments.BAM.Record: <not filled>" @test repr(record) == "XAM.BAM.Record: <not filled>"
@test_throws ArgumentError BAM.flag(record) @test_throws ArgumentError BAM.flag(record)
end end
@ -225,7 +237,7 @@ end
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam")) reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
@test isa(reader, BAM.Reader) @test isa(reader, BAM.Reader)
@test eltype(reader) === BAM.Record @test eltype(reader) === BAM.Record
@test startswith(repr(reader), "BioAlignments.BAM.Reader{IOStream}:") @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
# header # header
h = header(reader) h = header(reader)