1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-12-23 13:28:16 +00:00

Switch to CodecBGZF

This commit is contained in:
Jakob Nybo Nissen 2021-02-28 13:06:17 +01:00 committed by Ciarán O’Mara
parent 5bd793bc5f
commit bb5807795b
8 changed files with 24 additions and 19 deletions

View file

@ -5,10 +5,10 @@ version = "0.2.7"
[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
@ -16,12 +16,10 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
[compat]
Automa = "0.7, 0.8"
BGZFStreams = "0.3"
BioAlignments = "2"
BioGenerics = "0.1"
BioSequences = "2.0.4"
GenomicFeatures = "2"
Indexes = "0.1"
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
julia = "1"

View file

@ -6,8 +6,8 @@ module BAM
using BioGenerics
using GenomicFeatures
using XAM.SAM
using CodecBGZF
import BGZFStreams
import BioAlignments
import Indexes
import BioSequences

View file

@ -62,7 +62,7 @@ end
function Base.iterate(iter::OverlapIterator, state)
while state.chunkid lastindex(state.chunks)
chunk = state.chunks[state.chunkid]
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
while VirtualOffset(iter.reader.stream) < chunk.stop
read!(iter.reader, state.record)
c = compare_intervals(state.record, (state.refindex, iter.interval))
if c == 0

View file

@ -11,9 +11,9 @@ Create a data reader of the BAM file format.
* `index=nothing`: filepath to a random access index (currently *bai* is supported)
"""
mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
stream::BGZFStreams.BGZFStream{T}
stream::BGZFDecompressorStream{T}
header::SAM.Header
start_offset::BGZFStreams.VirtualOffset
start_offset::VirtualOffset
refseqnames::Vector{String}
refseqlens::Vector{Int}
index::Union{Nothing, BAI}
@ -64,7 +64,7 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
return header
end
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
function Base.seek(reader::Reader, voffset::CodecBGZF.VirtualOffset)
seek(reader.stream, voffset)
end
@ -80,7 +80,7 @@ function Base.iterate(reader::Reader, nextone = Record())
end
# Initialize a BAM reader by reading the header section.
function init_bam_reader(input::BGZFStreams.BGZFStream)
function init_bam_reader(input::BGZFDecompressorStream)
# magic bytes
B = read(input, UInt8)
A = read(input, UInt8)
@ -108,9 +108,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
refseqlens[i] = seqlen
end
voffset = isa(input.io, Base.AbstractPipe) ?
BGZFStreams.VirtualOffset(0, 0) :
BGZFStreams.virtualoffset(input)
voffset = VirtualOffset(input)
return Reader(
input,
@ -122,7 +120,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
end
function init_bam_reader(input::IO)
return init_bam_reader(BGZFStreams.BGZFStream(input))
return init_bam_reader(BGZFDecompressorStream(input))
end
function _read!(reader::Reader, record)

View file

@ -2,7 +2,7 @@
# ==========
"""
BAM.Writer(output::BGZFStream, header::SAM.Header)
BAM.Writer(output::BGZFCompressorStream, header::SAM.Header)
Create a data writer of the BAM file format.
@ -11,10 +11,10 @@ Create a data writer of the BAM file format.
* `header`: SAM header object
"""
mutable struct Writer <: BioGenerics.IO.AbstractWriter
stream::BGZFStreams.BGZFStream
stream::BGZFCompressorStream
end
function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header)
function Writer(stream::BGZFCompressorStream, header::SAM.Header)
refseqnames = String[]
refseqlens = Int[]
for metainfo in findall(header, "SQ")

9
test/Project.toml Normal file
View file

@ -0,0 +1,9 @@
[deps]
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
XAM = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"

View file

@ -4,9 +4,9 @@ using BioGenerics
using FormatSpecimens
using GenomicFeatures
using XAM
using CodecBGZF
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
import BGZFStreams: BGZFStream
import BioGenerics.Exceptions: MissingFieldException
import BioSequences: @dna_str, @aa_str

View file

@ -44,7 +44,6 @@
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
@test isa(reader, BAM.Reader)
@test eltype(reader) === BAM.Record
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
# header
h = header(reader)
@ -199,7 +198,8 @@
header_original = header(reader)
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
hdr = BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))
writer = BAM.Writer(BGZFCompressorStream(open(path, "w")), hdr)
records = BAM.Record[]
for record in reader