1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-11-14 14:23:11 +00:00

Merge pull request #26 from jakobnissen/rehaul

Make BAM record layout match BAM specs
This commit is contained in:
Ciarán O'Mara 2020-08-02 14:16:25 +10:00 committed by GitHub
commit 20dbf800cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 37 deletions

View file

@ -11,18 +11,21 @@ mutable struct Record
block_size::Int32
refid::Int32
pos::Int32
bin_mq_nl::UInt32
flag_nc::UInt32
l_read_name::UInt8
mapq::UInt8
bin::UInt16
n_cigar_op::UInt16
flag::UInt16
l_seq::Int32
next_refid::Int32
next_pos::Int32
tlen::Int32
# variable length data
data::Vector{UInt8}
reader::Reader
reader::Union{Reader, Nothing}
function Record()
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
end
end
@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
return a.block_size == b.block_size &&
a.refid == b.refid &&
a.pos == b.pos &&
a.bin_mq_nl == b.bin_mq_nl &&
a.flag_nc == b.flag_nc &&
a.l_read_name == b.l_read_name &&
a.mapq == b.mapq &&
a.bin == b.bin &&
a.n_cigar_op == b.n_cigar_op &&
a.flag == b.flag &&
a.l_seq == b.l_seq &&
a.next_refid == b.next_refid &&
a.next_pos == b.next_pos &&
@ -60,19 +66,13 @@ end
function Base.copy(record::Record)
copy = Record()
copy.block_size = record.block_size
copy.refid = record.refid
copy.pos = record.pos
copy.bin_mq_nl = record.bin_mq_nl
copy.flag_nc = record.flag_nc
copy.l_seq = record.l_seq
copy.next_refid = record.next_refid
copy.next_pos = record.next_pos
copy.tlen = record.tlen
copy.data = record.data[1:data_size(record)]
if isdefined(record, :reader)
copy.reader = record.reader
GC.@preserve copy record begin
dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
src_pointer = Ptr{UInt8}(pointer_from_objref(record))
unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
end
copy.data = record.data[1:data_size(record)]
copy.reader = record.reader
return copy
end
@ -80,8 +80,11 @@ function Base.empty!(record::Record)
record.block_size = 0
record.refid = 0
record.pos = 0
record.bin_mq_nl = 0
record.flag_nc = 0
record.l_read_name = 0
record.mapq = 0
record.bin = 0
record.flag = 0
record.n_cigar_op = 0
record.l_seq = 0
record.next_refid = 0
record.next_pos = 0
@ -132,7 +135,7 @@ Get the bitwise flag of `record`.
"""
function flag(record::Record)::UInt16
checkfilled(record)
return UInt16(record.flag_nc >> 16)
return record.flag
end
function hasflag(record::Record)
@ -318,7 +321,7 @@ end
Get the mapping quality of `record`.
"""
function mappingquality(record::Record)::UInt8
return UInt8((record.bin_mq_nl >> 8) & 0xff)
return record.mapq
end
function hasmappingquality(record::Record)
@ -397,7 +400,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
end
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF
cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
if !checkCG
return cigaridx, nops
end
@ -661,5 +664,5 @@ end
# Return the length of the read name.
function seqname_length(record::Record)
return record.bin_mq_nl & 0xff
return record.l_read_name
end

View file

@ -104,9 +104,12 @@
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
end
new_record = convert(BAM.Record, array)
@test record.bin_mq_nl == new_record.bin_mq_nl
@test record.l_read_name == new_record.l_read_name
@test record.mapq == new_record.mapq
@test record.bin == new_record.bin
@test record.block_size == new_record.block_size
@test record.flag_nc == new_record.flag_nc
@test record.flag == new_record.flag
@test record.n_cigar_op == new_record.n_cigar_op
@test record.l_seq == new_record.l_seq
@test record.next_refid == new_record.next_refid
@test record.next_pos == new_record.next_pos
@ -162,18 +165,21 @@
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
for (a, b) in zip(xs, ys)
if !(
x.block_size == y.block_size &&
x.refid == y.refid &&
x.pos == y.pos &&
x.bin_mq_nl == y.bin_mq_nl &&
x.flag_nc == y.flag_nc &&
x.l_seq == y.l_seq &&
x.next_refid == y.next_refid &&
x.next_pos == y.next_pos &&
x.tlen == y.tlen &&
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
a.block_size == b.block_size &&
a.refid == b.refid &&
a.pos == b.pos &&
a.l_read_name == b.l_read_name &&
a.mapq == b.mapq &&
a.bin == b.bin &&
a.n_cigar_op == b.n_cigar_op &&
a.flag == b.flag &&
a.l_seq == b.l_seq &&
a.next_refid == b.next_refid &&
a.next_pos == b.next_pos &&
a.tlen == b.tlen &&
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
return false
end
end