From bb5807795b75b927e3c717004e39c00355e393ec Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sun, 28 Feb 2021 13:06:17 +0100 Subject: [PATCH] Switch to CodecBGZF --- Project.toml | 4 +--- src/bam/bam.jl | 2 +- src/bam/overlap.jl | 2 +- src/bam/reader.jl | 14 ++++++-------- src/bam/writer.jl | 6 +++--- test/Project.toml | 9 +++++++++ test/runtests.jl | 2 +- test/test_bam.jl | 4 ++-- 8 files changed, 24 insertions(+), 19 deletions(-) create mode 100644 test/Project.toml diff --git a/Project.toml b/Project.toml index 0c21bf9..3587b18 100644 --- a/Project.toml +++ b/Project.toml @@ -5,10 +5,10 @@ version = "0.2.7" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" -BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6" GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -16,12 +16,10 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" [compat] Automa = "0.7, 0.8" -BGZFStreams = "0.3" BioAlignments = "2" BioGenerics = "0.1" BioSequences = "2.0.4" GenomicFeatures = "2" -Indexes = "0.1" TranscodingStreams = "0.6, 0.7, 0.8, 0.9" julia = "1" diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 6fd1174..991f01a 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,8 +6,8 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM +using CodecBGZF -import BGZFStreams import BioAlignments import Indexes import BioSequences diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index c6cf8ac..3f315b2 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -62,7 +62,7 @@ end function Base.iterate(iter::OverlapIterator, state) while state.chunkid ≤ lastindex(state.chunks) chunk = state.chunks[state.chunkid] - while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop + while VirtualOffset(iter.reader.stream) < chunk.stop read!(iter.reader, state.record) c = compare_intervals(state.record, (state.refindex, iter.interval)) if c == 0 diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 43e228e..bb1fac9 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -11,9 +11,9 @@ Create a data reader of the BAM file format. * `index=nothing`: filepath to a random access index (currently *bai* is supported) """ mutable struct Reader{T} <: BioGenerics.IO.AbstractReader - stream::BGZFStreams.BGZFStream{T} + stream::BGZFDecompressorStream{T} header::SAM.Header - start_offset::BGZFStreams.VirtualOffset + start_offset::VirtualOffset refseqnames::Vector{String} refseqlens::Vector{Int} index::Union{Nothing, BAI} @@ -64,7 +64,7 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header return header end -function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset) +function Base.seek(reader::Reader, voffset::CodecBGZF.VirtualOffset) seek(reader.stream, voffset) end @@ -80,7 +80,7 @@ function Base.iterate(reader::Reader, nextone = Record()) end # Initialize a BAM reader by reading the header section. -function init_bam_reader(input::BGZFStreams.BGZFStream) +function init_bam_reader(input::BGZFDecompressorStream) # magic bytes B = read(input, UInt8) A = read(input, UInt8) @@ -108,9 +108,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) refseqlens[i] = seqlen end - voffset = isa(input.io, Base.AbstractPipe) ? - BGZFStreams.VirtualOffset(0, 0) : - BGZFStreams.virtualoffset(input) + voffset = VirtualOffset(input) return Reader( input, @@ -122,7 +120,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) end function init_bam_reader(input::IO) - return init_bam_reader(BGZFStreams.BGZFStream(input)) + return init_bam_reader(BGZFDecompressorStream(input)) end function _read!(reader::Reader, record) diff --git a/src/bam/writer.jl b/src/bam/writer.jl index 2460b3f..eb4b68c 100644 --- a/src/bam/writer.jl +++ b/src/bam/writer.jl @@ -2,7 +2,7 @@ # ========== """ - BAM.Writer(output::BGZFStream, header::SAM.Header) + BAM.Writer(output::BGZFCompressorStream, header::SAM.Header) Create a data writer of the BAM file format. @@ -11,10 +11,10 @@ Create a data writer of the BAM file format. * `header`: SAM header object """ mutable struct Writer <: BioGenerics.IO.AbstractWriter - stream::BGZFStreams.BGZFStream + stream::BGZFCompressorStream end -function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header) +function Writer(stream::BGZFCompressorStream, header::SAM.Header) refseqnames = String[] refseqlens = Int[] for metainfo in findall(header, "SQ") diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..fbc3db0 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,9 @@ +[deps] +BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" +BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" +BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +CodecBGZF = "d9d91ef6-315d-495b-8131-db2ca24339d6" +FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd" +GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +XAM = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" diff --git a/test/runtests.jl b/test/runtests.jl index beede29..a17a352 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,9 +4,9 @@ using BioGenerics using FormatSpecimens using GenomicFeatures using XAM +using CodecBGZF import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE -import BGZFStreams: BGZFStream import BioGenerics.Exceptions: MissingFieldException import BioSequences: @dna_str, @aa_str diff --git a/test/test_bam.jl b/test/test_bam.jl index beb6d03..1780ee9 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -44,7 +44,6 @@ reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam")) @test isa(reader, BAM.Reader) @test eltype(reader) === BAM.Record - @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:") # header h = header(reader) @@ -199,7 +198,8 @@ header_original = header(reader) - writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))) + hdr = BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))) + writer = BAM.Writer(BGZFCompressorStream(open(path, "w")), hdr) records = BAM.Record[] for record in reader