mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-12-23 13:28:16 +00:00
Switch to CodecBGZF
This commit is contained in:
parent
5bd793bc5f
commit
bb5807795b
8 changed files with 24 additions and 19 deletions
|
@ -5,10 +5,10 @@ version = "0.2.7"
|
|||
|
||||
[deps]
|
||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
|
||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
|
||||
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
|
||||
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
|
||||
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
@ -16,12 +16,10 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
|||
|
||||
[compat]
|
||||
Automa = "0.7, 0.8"
|
||||
BGZFStreams = "0.3"
|
||||
BioAlignments = "2"
|
||||
BioGenerics = "0.1"
|
||||
BioSequences = "2.0.4"
|
||||
GenomicFeatures = "2"
|
||||
Indexes = "0.1"
|
||||
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
||||
julia = "1"
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ module BAM
|
|||
using BioGenerics
|
||||
using GenomicFeatures
|
||||
using XAM.SAM
|
||||
using CodecBGZF
|
||||
|
||||
import BGZFStreams
|
||||
import BioAlignments
|
||||
import Indexes
|
||||
import BioSequences
|
||||
|
|
|
@ -62,7 +62,7 @@ end
|
|||
function Base.iterate(iter::OverlapIterator, state)
|
||||
while state.chunkid ≤ lastindex(state.chunks)
|
||||
chunk = state.chunks[state.chunkid]
|
||||
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
||||
while VirtualOffset(iter.reader.stream) < chunk.stop
|
||||
read!(iter.reader, state.record)
|
||||
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
||||
if c == 0
|
||||
|
|
|
@ -11,9 +11,9 @@ Create a data reader of the BAM file format.
|
|||
* `index=nothing`: filepath to a random access index (currently *bai* is supported)
|
||||
"""
|
||||
mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
|
||||
stream::BGZFStreams.BGZFStream{T}
|
||||
stream::BGZFDecompressorStream{T}
|
||||
header::SAM.Header
|
||||
start_offset::BGZFStreams.VirtualOffset
|
||||
start_offset::VirtualOffset
|
||||
refseqnames::Vector{String}
|
||||
refseqlens::Vector{Int}
|
||||
index::Union{Nothing, BAI}
|
||||
|
@ -64,7 +64,7 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
|
|||
return header
|
||||
end
|
||||
|
||||
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
|
||||
function Base.seek(reader::Reader, voffset::CodecBGZF.VirtualOffset)
|
||||
seek(reader.stream, voffset)
|
||||
end
|
||||
|
||||
|
@ -80,7 +80,7 @@ function Base.iterate(reader::Reader, nextone = Record())
|
|||
end
|
||||
|
||||
# Initialize a BAM reader by reading the header section.
|
||||
function init_bam_reader(input::BGZFStreams.BGZFStream)
|
||||
function init_bam_reader(input::BGZFDecompressorStream)
|
||||
# magic bytes
|
||||
B = read(input, UInt8)
|
||||
A = read(input, UInt8)
|
||||
|
@ -108,9 +108,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
|
|||
refseqlens[i] = seqlen
|
||||
end
|
||||
|
||||
voffset = isa(input.io, Base.AbstractPipe) ?
|
||||
BGZFStreams.VirtualOffset(0, 0) :
|
||||
BGZFStreams.virtualoffset(input)
|
||||
voffset = VirtualOffset(input)
|
||||
|
||||
return Reader(
|
||||
input,
|
||||
|
@ -122,7 +120,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
|
|||
end
|
||||
|
||||
function init_bam_reader(input::IO)
|
||||
return init_bam_reader(BGZFStreams.BGZFStream(input))
|
||||
return init_bam_reader(BGZFDecompressorStream(input))
|
||||
end
|
||||
|
||||
function _read!(reader::Reader, record)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# ==========
|
||||
|
||||
"""
|
||||
BAM.Writer(output::BGZFStream, header::SAM.Header)
|
||||
BAM.Writer(output::BGZFCompressorStream, header::SAM.Header)
|
||||
|
||||
Create a data writer of the BAM file format.
|
||||
|
||||
|
@ -11,10 +11,10 @@ Create a data writer of the BAM file format.
|
|||
* `header`: SAM header object
|
||||
"""
|
||||
mutable struct Writer <: BioGenerics.IO.AbstractWriter
|
||||
stream::BGZFStreams.BGZFStream
|
||||
stream::BGZFCompressorStream
|
||||
end
|
||||
|
||||
function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header)
|
||||
function Writer(stream::BGZFCompressorStream, header::SAM.Header)
|
||||
refseqnames = String[]
|
||||
refseqlens = Int[]
|
||||
for metainfo in findall(header, "SQ")
|
||||
|
|
9
test/Project.toml
Normal file
9
test/Project.toml
Normal file
|
@ -0,0 +1,9 @@
|
|||
[deps]
|
||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
|
||||
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
|
||||
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
|
||||
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
XAM = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
|
@ -4,9 +4,9 @@ using BioGenerics
|
|||
using FormatSpecimens
|
||||
using GenomicFeatures
|
||||
using XAM
|
||||
using CodecBGZF
|
||||
|
||||
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
|
||||
import BGZFStreams: BGZFStream
|
||||
import BioGenerics.Exceptions: MissingFieldException
|
||||
import BioSequences: @dna_str, @aa_str
|
||||
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
||||
@test isa(reader, BAM.Reader)
|
||||
@test eltype(reader) === BAM.Record
|
||||
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
|
||||
|
||||
# header
|
||||
h = header(reader)
|
||||
|
@ -199,7 +198,8 @@
|
|||
|
||||
header_original = header(reader)
|
||||
|
||||
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
|
||||
hdr = BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))
|
||||
writer = BAM.Writer(BGZFCompressorStream(open(path, "w")), hdr)
|
||||
|
||||
records = BAM.Record[]
|
||||
for record in reader
|
||||
|
|
Loading…
Reference in a new issue