From ef99c703170e54e2e9873c13b47f8d46325fef25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 18 Jan 2020 12:00:39 +1100 Subject: [PATCH] Flatten returns --- src/bam/auxdata.jl | 66 ++++++++++++++++++++++++++++++--------------- src/bam/overlap.jl | 12 ++++++--- src/bam/reader.jl | 2 ++ src/bam/record.jl | 5 ++-- src/sam/record.jl | 67 ++++++++++++++++++++++++++++------------------ 5 files changed, 98 insertions(+), 54 deletions(-) diff --git a/src/bam/auxdata.jl b/src/bam/auxdata.jl index 2bcfac2..3ce0050 100644 --- a/src/bam/auxdata.jl +++ b/src/bam/auxdata.jl @@ -72,12 +72,15 @@ function loadauxtype(data::Vector{UInt8}, p::Int) b == UInt8('Z') ? String : error("invalid type tag: '$(Char(b))'")) end + t = data[p] + if t == UInt8('B') return p + 2, Vector{auxtype(data[p+1])} - else - return p + 1, auxtype(t) end + + return p + 1, auxtype(t) + end function loadauxvalue(data::Vector{UInt8}, p::Int, ::Type{T}) where T @@ -105,14 +108,17 @@ end function findauxtag(data::Vector{UInt8}, start::Int, stop::Int, t1::UInt8, t2::UInt8) pos = start + while pos ≤ stop && !(data[pos] == t1 && data[pos+1] == t2) pos = next_tag_position(data, pos) end + if pos > stop return 0 - else - return pos end + + return pos + end # Find the starting position of a next tag in `data` after `p`. @@ -120,24 +126,40 @@ end function next_tag_position(data::Vector{UInt8}, p::Int) typ = Char(data[p+2]) p += 3 + if typ == 'A' - p += 1 - elseif typ == 'c' || typ == 'C' - p += 1 - elseif typ == 's' || typ == 'S' - p += 2 - elseif typ == 'i' || typ == 'I' - p += 4 - elseif typ == 'f' - p += 4 - elseif typ == 'd' - p += 8 - elseif typ == 'Z' || typ == 'H' + return p += 1 + end + + if typ == 'c' || typ == 'C' + return p += 1 + end + + if typ == 's' || typ == 'S' + return p += 2 + end + + if typ == 'i' || typ == 'I' + return p += 4 + end + + if typ == 'f' + return p += 4 + end + + if typ == 'd' + return p += 8 + end + + if typ == 'Z' || typ == 'H' while data[p] != 0x00 # NULL-terminalted string p += 1 end - p += 1 - elseif typ == 'B' + return p += 1 + + end + + if typ == 'B' eltyp = Char(data[p]) elsize = eltyp == 'c' || eltyp == 'C' ? 1 : eltyp == 's' || eltyp == 'S' ? 2 : @@ -145,9 +167,9 @@ function next_tag_position(data::Vector{UInt8}, p::Int) error("invalid type tag: '$(Char(eltyp))'") p += 1 n = unsafe_load(Ptr{Int32}(pointer(data, p))) - p += 4 + elsize * n - else - error("invalid type tag: '$(Char(typ))'") + return p += 4 + elsize * n end - return p + + error("invalid type tag: '$(Char(typ))'") + end diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index c3475ba..186ff59 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -82,14 +82,18 @@ end function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}}) rid = refid(record) + if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2])) # strictly left return -1 - elseif rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) + end + + if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) # strictly right return +1 - else - # overlapping - return 0 end + + # overlapping + return 0 + end diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 906110a..301246c 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -89,6 +89,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) A = read(input, UInt8) M = read(input, UInt8) x = read(input, UInt8) + if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01 error("input was not a valid BAM file") end @@ -113,6 +114,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) voffset = isa(input.io, Base.AbstractPipe) ? BGZFStreams.VirtualOffset(0, 0) : BGZFStreams.virtualoffset(input) + return Reader( input, samreader.header, diff --git a/src/bam/record.jl b/src/bam/record.jl index 107f127..d0bde20 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -612,9 +612,10 @@ end function data_size(record::Record) if isfilled(record) return record.block_size - FIXED_FIELDS_BYTES + sizeof(record.block_size) - else - return 0 end + + return 0 + end function checkfilled(record::Record) diff --git a/src/sam/record.jl b/src/sam/record.jl index e627727..8989747 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -299,9 +299,10 @@ Get the alignment of `record`. function alignment(record::Record)::BioAlignments.Alignment if ismapped(record) return BioAlignments.Alignment(cigar(record), 1, position(record)) - else - return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[]) end + + return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[]) + end function hasalignment(record::Record) @@ -474,23 +475,30 @@ function Base.getindex(record::Record, tag::AbstractString) else hi = first(record.fields[i+1]) - 2 end + if typ == UInt8('A') @assert lo == hi return Char(record.data[lo]) - elseif typ == UInt8('i') + end + if typ == UInt8('i') return unsafe_parse_decimal(Int, record.data, lo:hi) - elseif typ == UInt8('f') + end + if typ == UInt8('f') # TODO: Call a C function directly for speed? return parse(Float32, SubString(record.data[lo:hi])) - elseif typ == UInt8('Z') - return String(record.data[lo:hi]) - elseif typ == UInt8('H') - return parse_hexarray(record.data, lo:hi) - elseif typ == UInt8('B') - return parse_typedarray(record.data, lo:hi) - else - throw(ArgumentError("type code '$(Char(typ))' is not defined")) end + if typ == UInt8('Z') + return String(record.data[lo:hi]) + end + if typ == UInt8('H') + return parse_hexarray(record.data, lo:hi) + end + if typ == UInt8('B') + return parse_typedarray(record.data, lo:hi) + end + + throw(ArgumentError("type code '$(Char(typ))' is not defined")) + end function Base.keys(record::Record) @@ -602,21 +610,28 @@ function parse_typedarray(data::Vector{UInt8}, range::UnitRange{Int}) xs = split(String(data[first(range)+2:last(range)])) if t == UInt8('c') return [parse(Int8, x) for x in xs] - elseif t == UInt8('C') - return [parse(UInt8, x) for x in xs] - elseif t == UInt8('s') - return [parse(Int16, x) for x in xs] - elseif t == UInt8('S') - return [parse(UInt16, x) for x in xs] - elseif t == UInt8('i') - return [parse(Int32, x) for x in xs] - elseif t == UInt8('I') - return [parse(UInt32, x) for x in xs] - elseif t == UInt8('f') - return [parse(Float32, x) for x in xs] - else - throw(ArgumentError("type code '$(Char(t))' is not defined")) end + if t == UInt8('C') + return [parse(UInt8, x) for x in xs] + end + if t == UInt8('s') + return [parse(Int16, x) for x in xs] + end + if t == UInt8('S') + return [parse(UInt16, x) for x in xs] + end + if t == UInt8('i') + return [parse(Int32, x) for x in xs] + end + if t == UInt8('I') + return [parse(UInt32, x) for x in xs] + end + if t == UInt8('f') + return [parse(Float32, x) for x in xs] + end + + throw(ArgumentError("type code '$(Char(t))' is not defined")) + end function ismissing(record::Record, range::UnitRange{Int})