1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-11-23 10:19:56 +00:00

Flatten returns

This commit is contained in:
Ciarán O'Mara 2020-01-18 12:00:39 +11:00
parent 7a4d6d143d
commit ef99c70317
5 changed files with 98 additions and 54 deletions

View file

@ -72,12 +72,15 @@ function loadauxtype(data::Vector{UInt8}, p::Int)
b == UInt8('Z') ? String : b == UInt8('Z') ? String :
error("invalid type tag: '$(Char(b))'")) error("invalid type tag: '$(Char(b))'"))
end end
t = data[p] t = data[p]
if t == UInt8('B') if t == UInt8('B')
return p + 2, Vector{auxtype(data[p+1])} return p + 2, Vector{auxtype(data[p+1])}
else
return p + 1, auxtype(t)
end end
return p + 1, auxtype(t)
end end
function loadauxvalue(data::Vector{UInt8}, p::Int, ::Type{T}) where T function loadauxvalue(data::Vector{UInt8}, p::Int, ::Type{T}) where T
@ -105,14 +108,17 @@ end
function findauxtag(data::Vector{UInt8}, start::Int, stop::Int, t1::UInt8, t2::UInt8) function findauxtag(data::Vector{UInt8}, start::Int, stop::Int, t1::UInt8, t2::UInt8)
pos = start pos = start
while pos stop && !(data[pos] == t1 && data[pos+1] == t2) while pos stop && !(data[pos] == t1 && data[pos+1] == t2)
pos = next_tag_position(data, pos) pos = next_tag_position(data, pos)
end end
if pos > stop if pos > stop
return 0 return 0
else
return pos
end end
return pos
end end
# Find the starting position of a next tag in `data` after `p`. # Find the starting position of a next tag in `data` after `p`.
@ -120,24 +126,40 @@ end
function next_tag_position(data::Vector{UInt8}, p::Int) function next_tag_position(data::Vector{UInt8}, p::Int)
typ = Char(data[p+2]) typ = Char(data[p+2])
p += 3 p += 3
if typ == 'A' if typ == 'A'
p += 1 return p += 1
elseif typ == 'c' || typ == 'C' end
p += 1
elseif typ == 's' || typ == 'S' if typ == 'c' || typ == 'C'
p += 2 return p += 1
elseif typ == 'i' || typ == 'I' end
p += 4
elseif typ == 'f' if typ == 's' || typ == 'S'
p += 4 return p += 2
elseif typ == 'd' end
p += 8
elseif typ == 'Z' || typ == 'H' if typ == 'i' || typ == 'I'
return p += 4
end
if typ == 'f'
return p += 4
end
if typ == 'd'
return p += 8
end
if typ == 'Z' || typ == 'H'
while data[p] != 0x00 # NULL-terminalted string while data[p] != 0x00 # NULL-terminalted string
p += 1 p += 1
end end
p += 1 return p += 1
elseif typ == 'B'
end
if typ == 'B'
eltyp = Char(data[p]) eltyp = Char(data[p])
elsize = eltyp == 'c' || eltyp == 'C' ? 1 : elsize = eltyp == 'c' || eltyp == 'C' ? 1 :
eltyp == 's' || eltyp == 'S' ? 2 : eltyp == 's' || eltyp == 'S' ? 2 :
@ -145,9 +167,9 @@ function next_tag_position(data::Vector{UInt8}, p::Int)
error("invalid type tag: '$(Char(eltyp))'") error("invalid type tag: '$(Char(eltyp))'")
p += 1 p += 1
n = unsafe_load(Ptr{Int32}(pointer(data, p))) n = unsafe_load(Ptr{Int32}(pointer(data, p)))
p += 4 + elsize * n return p += 4 + elsize * n
else end
error("invalid type tag: '$(Char(typ))'") error("invalid type tag: '$(Char(typ))'")
end
return p
end end

View file

@ -82,14 +82,18 @@ end
function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}}) function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
rid = refid(record) rid = refid(record)
if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2])) if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2]))
# strictly left # strictly left
return -1 return -1
elseif rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) end
if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
# strictly right # strictly right
return +1 return +1
else end
# overlapping # overlapping
return 0 return 0
end
end end

View file

@ -89,6 +89,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
A = read(input, UInt8) A = read(input, UInt8)
M = read(input, UInt8) M = read(input, UInt8)
x = read(input, UInt8) x = read(input, UInt8)
if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01 if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01
error("input was not a valid BAM file") error("input was not a valid BAM file")
end end
@ -113,6 +114,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
voffset = isa(input.io, Base.AbstractPipe) ? voffset = isa(input.io, Base.AbstractPipe) ?
BGZFStreams.VirtualOffset(0, 0) : BGZFStreams.VirtualOffset(0, 0) :
BGZFStreams.virtualoffset(input) BGZFStreams.virtualoffset(input)
return Reader( return Reader(
input, input,
samreader.header, samreader.header,

View file

@ -612,9 +612,10 @@ end
function data_size(record::Record) function data_size(record::Record)
if isfilled(record) if isfilled(record)
return record.block_size - FIXED_FIELDS_BYTES + sizeof(record.block_size) return record.block_size - FIXED_FIELDS_BYTES + sizeof(record.block_size)
else
return 0
end end
return 0
end end
function checkfilled(record::Record) function checkfilled(record::Record)

View file

@ -299,9 +299,10 @@ Get the alignment of `record`.
function alignment(record::Record)::BioAlignments.Alignment function alignment(record::Record)::BioAlignments.Alignment
if ismapped(record) if ismapped(record)
return BioAlignments.Alignment(cigar(record), 1, position(record)) return BioAlignments.Alignment(cigar(record), 1, position(record))
else
return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
end end
return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
end end
function hasalignment(record::Record) function hasalignment(record::Record)
@ -474,23 +475,30 @@ function Base.getindex(record::Record, tag::AbstractString)
else else
hi = first(record.fields[i+1]) - 2 hi = first(record.fields[i+1]) - 2
end end
if typ == UInt8('A') if typ == UInt8('A')
@assert lo == hi @assert lo == hi
return Char(record.data[lo]) return Char(record.data[lo])
elseif typ == UInt8('i') end
if typ == UInt8('i')
return unsafe_parse_decimal(Int, record.data, lo:hi) return unsafe_parse_decimal(Int, record.data, lo:hi)
elseif typ == UInt8('f') end
if typ == UInt8('f')
# TODO: Call a C function directly for speed? # TODO: Call a C function directly for speed?
return parse(Float32, SubString(record.data[lo:hi])) return parse(Float32, SubString(record.data[lo:hi]))
elseif typ == UInt8('Z')
return String(record.data[lo:hi])
elseif typ == UInt8('H')
return parse_hexarray(record.data, lo:hi)
elseif typ == UInt8('B')
return parse_typedarray(record.data, lo:hi)
else
throw(ArgumentError("type code '$(Char(typ))' is not defined"))
end end
if typ == UInt8('Z')
return String(record.data[lo:hi])
end
if typ == UInt8('H')
return parse_hexarray(record.data, lo:hi)
end
if typ == UInt8('B')
return parse_typedarray(record.data, lo:hi)
end
throw(ArgumentError("type code '$(Char(typ))' is not defined"))
end end
function Base.keys(record::Record) function Base.keys(record::Record)
@ -602,21 +610,28 @@ function parse_typedarray(data::Vector{UInt8}, range::UnitRange{Int})
xs = split(String(data[first(range)+2:last(range)])) xs = split(String(data[first(range)+2:last(range)]))
if t == UInt8('c') if t == UInt8('c')
return [parse(Int8, x) for x in xs] return [parse(Int8, x) for x in xs]
elseif t == UInt8('C')
return [parse(UInt8, x) for x in xs]
elseif t == UInt8('s')
return [parse(Int16, x) for x in xs]
elseif t == UInt8('S')
return [parse(UInt16, x) for x in xs]
elseif t == UInt8('i')
return [parse(Int32, x) for x in xs]
elseif t == UInt8('I')
return [parse(UInt32, x) for x in xs]
elseif t == UInt8('f')
return [parse(Float32, x) for x in xs]
else
throw(ArgumentError("type code '$(Char(t))' is not defined"))
end end
if t == UInt8('C')
return [parse(UInt8, x) for x in xs]
end
if t == UInt8('s')
return [parse(Int16, x) for x in xs]
end
if t == UInt8('S')
return [parse(UInt16, x) for x in xs]
end
if t == UInt8('i')
return [parse(Int32, x) for x in xs]
end
if t == UInt8('I')
return [parse(UInt32, x) for x in xs]
end
if t == UInt8('f')
return [parse(Float32, x) for x in xs]
end
throw(ArgumentError("type code '$(Char(t))' is not defined"))
end end
function ismissing(record::Record, range::UnitRange{Int}) function ismissing(record::Record, range::UnitRange{Int})