From bb13a38cf70b3eab70eccde534098466b5a5b2ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 17 Apr 2020 15:49:24 +1000 Subject: [PATCH 1/5] Increment version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f45120b..19cc98b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "CiarĂ¡n O'Mara "] -version = "0.2.2" +version = "0.2.3" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" From 5fa8c5bd2eb31d56b2bec67a8634e596ff43ec38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 17 Apr 2020 15:51:11 +1000 Subject: [PATCH 2/5] Tests for in-place-reading pattern --- test/runtests.jl | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 30f6914..d727d08 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -196,6 +196,29 @@ end end end end + + @testset "In-Place-Reading Pattern" begin + + file_sam = joinpath(samdir, "ce#5b.sam") + + records = open(collect, SAM.Reader, file_sam) + + reader = open(SAM.Reader, file_sam) + record = SAM.Record() + i = 0 + while !eof(reader) + empty!(record) # Reset the record. + read!(reader, record) + + i = i + 1 + + @test records[i] == record + + end + + close(reader) + + end end @testset "BAM" begin @@ -414,6 +437,27 @@ end end end + @testset "In-Place-Reading Pattern" begin + + file_bam = joinpath(bamdir, "ce#5b.bam") + + records = open(collect, BAM.Reader, file_bam) + + reader = open(BAM.Reader, file_bam) + record = BAM.Record() + i = 0 + while !eof(reader) + empty!(record) # Reset the record. + read!(reader, record) + + i = i + 1 + @test records[i] == record + end + + close(reader) + + end + @testset "Random access" begin filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam") reader = open(BAM.Reader, filepath, index=filepath * ".bai") From 300158dbef8b23e122f258bf22a08a331efd6892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 17 Apr 2020 15:55:41 +1000 Subject: [PATCH 3/5] Record equivalency functions --- src/bam/record.jl | 13 +++++++++++++ src/sam/record.jl | 17 +++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/bam/record.jl b/src/bam/record.jl index 13c5608..bf65078 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -45,6 +45,19 @@ function Base.convert(::Type{Record}, data::Vector{UInt8}) return record end +function Base.:(==)(a::Record, b::Record) + return a.block_size == b.block_size && + a.refid == b.refid && + a.pos == b.pos && + a.bin_mq_nl == b.bin_mq_nl && + a.flag_nc == b.flag_nc && + a.l_seq == b.l_seq && + a.next_refid == b.next_refid && + a.next_pos == b.next_pos && + a.tlen == b.tlen && + a.data[1:data_size(a)] == b.data[1:data_size(b)] +end + function Base.copy(record::Record) copy = Record() copy.block_size = record.block_size diff --git a/src/sam/record.jl b/src/sam/record.jl index a61e0df..f7f381c 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -74,6 +74,23 @@ function Base.convert(::Type{Record}, str::AbstractString) return Record(Vector{UInt8}(str)) end +function Base.:(==)(a::Record, b::Record) + return a.filled == b.filled && + a.qname == b.qname && + a.flag == b.flag && + a.rname == b.rname && + a.pos == b.pos && + a.mapq == b.mapq && + a.cigar == b.cigar && + a.rnext == b.rnext && + a.pnext == b.pnext && + a.tlen == b.tlen && + a.seq == b.seq && + a.qual == b.qual && + a.fields == b.fields && + a.data[a.filled] == b.data[b.filled] +end + function Base.show(io::IO, record::Record) print(io, summary(record), ':') if isfilled(record) From f00547177f8aac5168a5c8178012b1f51bd571e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 17 Apr 2020 15:52:40 +1000 Subject: [PATCH 4/5] Empty records during iteration --- src/bam/reader.jl | 7 +++---- src/bam/record.jl | 16 ++++++++++++++++ src/sam/reader.jl | 15 +++++++++++++-- src/sam/record.jl | 6 +++++- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 953f5ca..43e228e 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -72,12 +72,11 @@ function Base.seekstart(reader::Reader) seek(reader.stream, reader.start_offset) end -function Base.iterate(reader::Reader, rec=Record()) - if eof(reader) +function Base.iterate(reader::Reader, nextone = Record()) + if BioGenerics.IO.tryread!(reader, nextone) === nothing return nothing end - read!(reader, rec) - return copy(rec), rec + return copy(nextone), empty!(nextone) end # Initialize a BAM reader by reading the header section. diff --git a/src/bam/record.jl b/src/bam/record.jl index bf65078..7f322b5 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -76,6 +76,22 @@ function Base.copy(record::Record) return copy end +function Base.empty!(record::Record) + record.block_size = 0 + record.refid = 0 + record.pos = 0 + record.bin_mq_nl = 0 + record.flag_nc = 0 + record.l_seq = 0 + record.next_refid = 0 + record.next_pos = 0 + record.tlen = 0 + + #Note: data will be overwritten and indexed using data_size. + + return record +end + function Base.show(io::IO, record::Record) print(io, summary(record), ':') if isfilled(record) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 24152c4..a20fd4c 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -81,9 +81,20 @@ function index!(record::Record) return record end -function Base.read!(rdr::Reader, rec::Record) +function Base.iterate(reader::Reader, nextone::Record = Record()) + if BioGenerics.IO.tryread!(reader, nextone) === nothing + return nothing + end + return copy(nextone), empty!(nextone) +end - empty!(rec.fields) #Note: data is pushed to the fields field, and other field data is overwritten. #TODO: distinguish for inplace reading pattern. +""" + read!(rdr::Reader, rec::Record) + +Read a `Record` into `rec`; overwriting or adding to existing field values. +It is assumed that `rec` is already initialized or empty. +""" +function Base.read!(rdr::Reader, rec::Record) cs, ln, f = readrecord!(rdr.state.stream, rec, (rdr.state.state, rdr.state.linenum)) diff --git a/src/sam/record.jl b/src/sam/record.jl index f7f381c..9bb33bd 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -571,7 +571,7 @@ end # Helper Functions # ---------------- -function initialize!(record::Record) +function Base.empty!(record::Record) record.filled = 1:0 record.qname = 1:0 record.flag = 1:0 @@ -588,6 +588,10 @@ function initialize!(record::Record) return record end +function initialize!(record::Record) #TODO: deprecate. + return empty!(record) +end + function checkfilled(record::Record) if !isfilled(record) throw(ArgumentError("unfilled SAM record")) From b440bc3c4d3b77d2f062d422387a7337557f3a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 17 Apr 2020 15:52:50 +1000 Subject: [PATCH 5/5] Update documentation --- docs/src/man/hts-files.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index de87b32..3f57ef7 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -58,6 +58,7 @@ In-place reading reuses a pre-allocated object for every record and less memory reader = open(BAM.Reader, "data.bam") record = BAM.Record() while !eof(reader) + empty!(record) read!(reader, record) # do something end