Flatten returns

2024-11-23 10:19:56 +00:00 · 2020-01-18 12:00:39 +11:00 · 2020-01-18 12:00:39 +11:00 · ef99c70317
commit ef99c70317
parent 7a4d6d143d
5 changed files with 98 additions and 54 deletions
--- a/src/bam/auxdata.jl
+++ b/src/bam/auxdata.jl
@ -72,12 +72,15 @@ function loadauxtype(data::Vector{UInt8}, p::Int)
            b == UInt8('Z') ? String :
            error("invalid type tag: '$(Char(b))'"))
    end
    t = data[p]
    if t == UInt8('B')
        return p + 2, Vector{auxtype(data[p+1])}
    else
        return p + 1, auxtype(t)
    end
    return p + 1, auxtype(t)
 end
 function loadauxvalue(data::Vector{UInt8}, p::Int, ::Type{T}) where T
@ -105,14 +108,17 @@ end
 function findauxtag(data::Vector{UInt8}, start::Int, stop::Int, t1::UInt8, t2::UInt8)
    pos = start
    while pos ≤ stop && !(data[pos] == t1 && data[pos+1] == t2)
        pos = next_tag_position(data, pos)
    end
    if pos > stop
        return 0
    else
        return pos
    end
    return pos
 end
 # Find the starting position of a next tag in `data` after `p`.
@ -120,24 +126,40 @@ end
 function next_tag_position(data::Vector{UInt8}, p::Int)
    typ = Char(data[p+2])
    p += 3
    if typ == 'A'
-        p += 1
+        return p += 1
-    elseif typ == 'c' || typ == 'C'
+    end
-        p += 1
+
-    elseif typ == 's' || typ == 'S'
+    if typ == 'c' || typ == 'C'
-        p += 2
+        return p += 1
-    elseif typ == 'i' || typ == 'I'
+    end
-        p += 4
+
-    elseif typ == 'f'
+    if typ == 's' || typ == 'S'
-        p += 4
+        return p += 2
-    elseif typ == 'd'
+    end
-        p += 8
+
-    elseif typ == 'Z' || typ == 'H'
+    if typ == 'i' || typ == 'I'
        return p += 4
    end
    if typ == 'f'
        return p += 4
    end
    if typ == 'd'
        return p += 8
    end
    if typ == 'Z' || typ == 'H'
        while data[p] != 0x00  # NULL-terminalted string
            p += 1
        end
-        p += 1
+        return p += 1
-    elseif typ == 'B'
+
    end
    if typ == 'B'
        eltyp = Char(data[p])
        elsize = eltyp == 'c' || eltyp == 'C'                 ? 1 :
                 eltyp == 's' || eltyp == 'S'                 ? 2 :
@ -145,9 +167,9 @@ function next_tag_position(data::Vector{UInt8}, p::Int)
                 error("invalid type tag: '$(Char(eltyp))'")
        p += 1
        n = unsafe_load(Ptr{Int32}(pointer(data, p)))
-        p += 4 + elsize * n
+        return p += 4 + elsize * n
-    else
+    end
    error("invalid type tag: '$(Char(typ))'")
-    end
+
    return p
 end
--- a/src/bam/overlap.jl
+++ b/src/bam/overlap.jl
@ -82,14 +82,18 @@ end
 function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
    rid = refid(record)
    if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2]))
        # strictly left
        return -1
-    elseif rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
+    end
    if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
        # strictly right
        return +1
-    else
+    end
    # overlapping
    return 0
-    end
+
 end
--- a/src/bam/reader.jl
+++ b/src/bam/reader.jl
@ -89,6 +89,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
    A = read(input, UInt8)
    M = read(input, UInt8)
    x = read(input, UInt8)
    if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01
        error("input was not a valid BAM file")
    end
@ -113,6 +114,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream)
    voffset = isa(input.io, Base.AbstractPipe) ?
        BGZFStreams.VirtualOffset(0, 0) :
        BGZFStreams.virtualoffset(input)
    return Reader(
        input,
        samreader.header,
--- a/src/bam/record.jl
+++ b/src/bam/record.jl
@ -612,9 +612,10 @@ end
 function data_size(record::Record)
    if isfilled(record)
        return record.block_size - FIXED_FIELDS_BYTES + sizeof(record.block_size)
    else
        return 0
    end
    return 0
 end
 function checkfilled(record::Record)
--- a/src/sam/record.jl
+++ b/src/sam/record.jl
@ -299,9 +299,10 @@ Get the alignment of `record`.
 function alignment(record::Record)::BioAlignments.Alignment
    if ismapped(record)
        return BioAlignments.Alignment(cigar(record), 1, position(record))
    else
        return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
    end
    return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
 end
 function hasalignment(record::Record)
@ -474,23 +475,30 @@ function Base.getindex(record::Record, tag::AbstractString)
    else
        hi = first(record.fields[i+1]) - 2
    end
    if typ == UInt8('A')
        @assert lo == hi
        return Char(record.data[lo])
-    elseif typ == UInt8('i')
+    end
    if typ == UInt8('i')
        return unsafe_parse_decimal(Int, record.data, lo:hi)
-    elseif typ == UInt8('f')
+    end
    if typ == UInt8('f')
        # TODO: Call a C function directly for speed?
        return parse(Float32, SubString(record.data[lo:hi]))
    elseif typ == UInt8('Z')
        return String(record.data[lo:hi])
    elseif typ == UInt8('H')
        return parse_hexarray(record.data, lo:hi)
    elseif typ == UInt8('B')
        return parse_typedarray(record.data, lo:hi)
    else
        throw(ArgumentError("type code '$(Char(typ))' is not defined"))
    end
    if typ == UInt8('Z')
        return String(record.data[lo:hi])
    end
    if typ == UInt8('H')
        return parse_hexarray(record.data, lo:hi)
    end
    if typ == UInt8('B')
        return parse_typedarray(record.data, lo:hi)
    end
    throw(ArgumentError("type code '$(Char(typ))' is not defined"))
 end
 function Base.keys(record::Record)
@ -602,21 +610,28 @@ function parse_typedarray(data::Vector{UInt8}, range::UnitRange{Int})
    xs = split(String(data[first(range)+2:last(range)]))
    if t == UInt8('c')
        return [parse(Int8, x) for x in xs]
    elseif t == UInt8('C')
        return [parse(UInt8, x) for x in xs]
    elseif t == UInt8('s')
        return [parse(Int16, x) for x in xs]
    elseif t == UInt8('S')
        return [parse(UInt16, x) for x in xs]
    elseif t == UInt8('i')
        return [parse(Int32, x) for x in xs]
    elseif t == UInt8('I')
        return [parse(UInt32, x) for x in xs]
    elseif t == UInt8('f')
        return [parse(Float32, x) for x in xs]
    else
        throw(ArgumentError("type code '$(Char(t))' is not defined"))
    end
    if t == UInt8('C')
        return [parse(UInt8, x) for x in xs]
    end
    if t == UInt8('s')
        return [parse(Int16, x) for x in xs]
    end
    if t == UInt8('S')
        return [parse(UInt16, x) for x in xs]
    end
    if t == UInt8('i')
        return [parse(Int32, x) for x in xs]
    end
    if t == UInt8('I')
        return [parse(UInt32, x) for x in xs]
    end
    if t == UInt8('f')
        return [parse(Float32, x) for x in xs]
    end
    throw(ArgumentError("type code '$(Char(t))' is not defined"))
 end
 function ismissing(record::Record, range::UnitRange{Int})