mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-12-23 21:28:18 +00:00
Switch to CodecBGZF
This commit is contained in:
parent
5bd793bc5f
commit
bb5807795b
8 changed files with 24 additions and 19 deletions
|
@ -5,10 +5,10 @@ version = "0.2.7"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||||
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
|
|
||||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
|
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
|
||||||
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||||
|
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
|
||||||
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||||
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
|
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
|
||||||
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||||
|
@ -16,12 +16,10 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
||||||
|
|
||||||
[compat]
|
[compat]
|
||||||
Automa = "0.7, 0.8"
|
Automa = "0.7, 0.8"
|
||||||
BGZFStreams = "0.3"
|
|
||||||
BioAlignments = "2"
|
BioAlignments = "2"
|
||||||
BioGenerics = "0.1"
|
BioGenerics = "0.1"
|
||||||
BioSequences = "2.0.4"
|
BioSequences = "2.0.4"
|
||||||
GenomicFeatures = "2"
|
GenomicFeatures = "2"
|
||||||
Indexes = "0.1"
|
|
||||||
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
||||||
julia = "1"
|
julia = "1"
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,8 @@ module BAM
|
||||||
using BioGenerics
|
using BioGenerics
|
||||||
using GenomicFeatures
|
using GenomicFeatures
|
||||||
using XAM.SAM
|
using XAM.SAM
|
||||||
|
using CodecBGZF
|
||||||
|
|
||||||
import BGZFStreams
|
|
||||||
import BioAlignments
|
import BioAlignments
|
||||||
import Indexes
|
import Indexes
|
||||||
import BioSequences
|
import BioSequences
|
||||||
|
|
|
@ -62,7 +62,7 @@ end
|
||||||
function Base.iterate(iter::OverlapIterator, state)
|
function Base.iterate(iter::OverlapIterator, state)
|
||||||
while state.chunkid ≤ lastindex(state.chunks)
|
while state.chunkid ≤ lastindex(state.chunks)
|
||||||
chunk = state.chunks[state.chunkid]
|
chunk = state.chunks[state.chunkid]
|
||||||
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
while VirtualOffset(iter.reader.stream) < chunk.stop
|
||||||
read!(iter.reader, state.record)
|
read!(iter.reader, state.record)
|
||||||
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
||||||
if c == 0
|
if c == 0
|
||||||
|
|
|
@ -11,9 +11,9 @@ Create a data reader of the BAM file format.
|
||||||
* `index=nothing`: filepath to a random access index (currently *bai* is supported)
|
* `index=nothing`: filepath to a random access index (currently *bai* is supported)
|
||||||
"""
|
"""
|
||||||
mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
|
mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
|
||||||
stream::BGZFStreams.BGZFStream{T}
|
stream::BGZFDecompressorStream{T}
|
||||||
header::SAM.Header
|
header::SAM.Header
|
||||||
start_offset::BGZFStreams.VirtualOffset
|
start_offset::VirtualOffset
|
||||||
refseqnames::Vector{String}
|
refseqnames::Vector{String}
|
||||||
refseqlens::Vector{Int}
|
refseqlens::Vector{Int}
|
||||||
index::Union{Nothing, BAI}
|
index::Union{Nothing, BAI}
|
||||||
|
@ -64,7 +64,7 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
|
||||||
return header
|
return header
|
||||||
end
|
end
|
||||||
|
|
||||||
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
|
function Base.seek(reader::Reader, voffset::CodecBGZF.VirtualOffset)
|
||||||
seek(reader.stream, voffset)
|
seek(reader.stream, voffset)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ function Base.iterate(reader::Reader, nextone = Record())
|
||||||
end
|
end
|
||||||
|
|
||||||
# Initialize a BAM reader by reading the header section.
|
# Initialize a BAM reader by reading the header section.
|
||||||
function init_bam_reader(input::BGZFStreams.BGZFStream)
|
function init_bam_reader(input::BGZFDecompressorStream)
|
||||||
# magic bytes
|
# magic bytes
|
||||||
B = read(input, UInt8)
|
B = read(input, UInt8)
|
||||||
A = read(input, UInt8)
|
A = read(input, UInt8)
|
||||||
|
@ -108,9 +108,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
|
||||||
refseqlens[i] = seqlen
|
refseqlens[i] = seqlen
|
||||||
end
|
end
|
||||||
|
|
||||||
voffset = isa(input.io, Base.AbstractPipe) ?
|
voffset = VirtualOffset(input)
|
||||||
BGZFStreams.VirtualOffset(0, 0) :
|
|
||||||
BGZFStreams.virtualoffset(input)
|
|
||||||
|
|
||||||
return Reader(
|
return Reader(
|
||||||
input,
|
input,
|
||||||
|
@ -122,7 +120,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
|
||||||
end
|
end
|
||||||
|
|
||||||
function init_bam_reader(input::IO)
|
function init_bam_reader(input::IO)
|
||||||
return init_bam_reader(BGZFStreams.BGZFStream(input))
|
return init_bam_reader(BGZFDecompressorStream(input))
|
||||||
end
|
end
|
||||||
|
|
||||||
function _read!(reader::Reader, record)
|
function _read!(reader::Reader, record)
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# ==========
|
# ==========
|
||||||
|
|
||||||
"""
|
"""
|
||||||
BAM.Writer(output::BGZFStream, header::SAM.Header)
|
BAM.Writer(output::BGZFCompressorStream, header::SAM.Header)
|
||||||
|
|
||||||
Create a data writer of the BAM file format.
|
Create a data writer of the BAM file format.
|
||||||
|
|
||||||
|
@ -11,10 +11,10 @@ Create a data writer of the BAM file format.
|
||||||
* `header`: SAM header object
|
* `header`: SAM header object
|
||||||
"""
|
"""
|
||||||
mutable struct Writer <: BioGenerics.IO.AbstractWriter
|
mutable struct Writer <: BioGenerics.IO.AbstractWriter
|
||||||
stream::BGZFStreams.BGZFStream
|
stream::BGZFCompressorStream
|
||||||
end
|
end
|
||||||
|
|
||||||
function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header)
|
function Writer(stream::BGZFCompressorStream, header::SAM.Header)
|
||||||
refseqnames = String[]
|
refseqnames = String[]
|
||||||
refseqlens = Int[]
|
refseqlens = Int[]
|
||||||
for metainfo in findall(header, "SQ")
|
for metainfo in findall(header, "SQ")
|
||||||
|
|
9
test/Project.toml
Normal file
9
test/Project.toml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
[deps]
|
||||||
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
|
BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
|
||||||
|
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||||
|
CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6"
|
||||||
|
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
|
||||||
|
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||||
|
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||||
|
XAM = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
|
@ -4,9 +4,9 @@ using BioGenerics
|
||||||
using FormatSpecimens
|
using FormatSpecimens
|
||||||
using GenomicFeatures
|
using GenomicFeatures
|
||||||
using XAM
|
using XAM
|
||||||
|
using CodecBGZF
|
||||||
|
|
||||||
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
|
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
|
||||||
import BGZFStreams: BGZFStream
|
|
||||||
import BioGenerics.Exceptions: MissingFieldException
|
import BioGenerics.Exceptions: MissingFieldException
|
||||||
import BioSequences: @dna_str, @aa_str
|
import BioSequences: @dna_str, @aa_str
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,6 @@
|
||||||
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
||||||
@test isa(reader, BAM.Reader)
|
@test isa(reader, BAM.Reader)
|
||||||
@test eltype(reader) === BAM.Record
|
@test eltype(reader) === BAM.Record
|
||||||
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
|
|
||||||
|
|
||||||
# header
|
# header
|
||||||
h = header(reader)
|
h = header(reader)
|
||||||
|
@ -199,7 +198,8 @@
|
||||||
|
|
||||||
header_original = header(reader)
|
header_original = header(reader)
|
||||||
|
|
||||||
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
|
hdr = BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))
|
||||||
|
writer = BAM.Writer(BGZFCompressorStream(open(path, "w")), hdr)
|
||||||
|
|
||||||
records = BAM.Record[]
|
records = BAM.Record[]
|
||||||
for record in reader
|
for record in reader
|
||||||
|
|
Loading…
Reference in a new issue