2020-01-17 18:24:09 +00:00
|
|
|
# BAM Overlap
|
|
|
|
# ===========
|
|
|
|
|
|
|
|
struct OverlapIterator{T}
|
|
|
|
reader::Reader{T}
|
|
|
|
refname::String
|
|
|
|
interval::UnitRange{Int}
|
|
|
|
end
|
|
|
|
|
|
|
|
function Base.IteratorSize(::Type{OverlapIterator{T}}) where T
|
|
|
|
return Base.SizeUnknown()
|
|
|
|
end
|
|
|
|
|
|
|
|
function Base.eltype(::Type{OverlapIterator{T}}) where T
|
|
|
|
return Record
|
|
|
|
end
|
|
|
|
|
|
|
|
function GenomicFeatures.eachoverlap(reader::Reader, interval::Interval)
|
|
|
|
return GenomicFeatures.eachoverlap(reader, interval.seqname, interval.first:interval.last)
|
|
|
|
end
|
|
|
|
|
|
|
|
function GenomicFeatures.eachoverlap(reader::Reader, interval)
|
|
|
|
return GenomicFeatures.eachoverlap(reader, convert(Interval, interval))
|
|
|
|
end
|
|
|
|
|
|
|
|
function GenomicFeatures.eachoverlap(reader::Reader, refname::AbstractString, interval::UnitRange)
|
|
|
|
return OverlapIterator(reader, String(refname), interval)
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
# Iterator
|
|
|
|
# --------
|
|
|
|
|
|
|
|
mutable struct OverlapIteratorState
|
|
|
|
# reference index
|
|
|
|
refindex::Int
|
|
|
|
|
|
|
|
# possibly overlapping chunks
|
2020-02-20 10:19:07 +00:00
|
|
|
chunks::Vector{Indexes.Chunk}
|
2020-01-17 18:24:09 +00:00
|
|
|
|
|
|
|
# current chunk index
|
|
|
|
chunkid::Int
|
|
|
|
|
|
|
|
# pre-allocated record
|
|
|
|
record::Record
|
|
|
|
end
|
|
|
|
|
|
|
|
function Base.iterate(iter::OverlapIterator)
|
|
|
|
refindex = findfirst(isequal(iter.refname), iter.reader.refseqnames)
|
|
|
|
if refindex == 0
|
|
|
|
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
|
|
|
end
|
|
|
|
@assert iter.reader.index !== nothing
|
2020-02-20 10:19:07 +00:00
|
|
|
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
2020-01-17 18:24:09 +00:00
|
|
|
if !isempty(chunks)
|
|
|
|
seek(iter.reader, first(chunks).start)
|
|
|
|
end
|
|
|
|
state = OverlapIteratorState(refindex, chunks, 1, Record())
|
|
|
|
return iterate(iter, state)
|
|
|
|
end
|
|
|
|
|
|
|
|
function Base.iterate(iter::OverlapIterator, state)
|
|
|
|
while state.chunkid ≤ lastindex(state.chunks)
|
|
|
|
chunk = state.chunks[state.chunkid]
|
|
|
|
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
|
|
|
read!(iter.reader, state.record)
|
|
|
|
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
|
|
|
if c == 0
|
|
|
|
return copy(state.record), state
|
|
|
|
elseif c > 0
|
|
|
|
# no more overlapping records in this chunk since records are sorted
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
state.chunkid += 1
|
|
|
|
if state.chunkid ≤ lastindex(state.chunks)
|
|
|
|
seek(iter.reader, state.chunks[state.chunkid].start)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return nothing
|
|
|
|
end
|
|
|
|
|
|
|
|
function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
|
|
|
|
rid = refid(record)
|
|
|
|
if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2]))
|
|
|
|
# strictly left
|
|
|
|
return -1
|
|
|
|
elseif rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
|
|
|
|
# strictly right
|
|
|
|
return +1
|
|
|
|
else
|
|
|
|
# overlapping
|
|
|
|
return 0
|
|
|
|
end
|
|
|
|
end
|