1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-11-15 06:43:10 +00:00

Merge pull request #26 from jakobnissen/rehaul

Make BAM record layout match BAM specs
This commit is contained in:
Ciarán O'Mara 2020-08-02 14:16:25 +10:00 committed by GitHub
commit 20dbf800cc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 37 deletions

View file

@ -11,18 +11,21 @@ mutable struct Record
block_size::Int32 block_size::Int32
refid::Int32 refid::Int32
pos::Int32 pos::Int32
bin_mq_nl::UInt32 l_read_name::UInt8
flag_nc::UInt32 mapq::UInt8
bin::UInt16
n_cigar_op::UInt16
flag::UInt16
l_seq::Int32 l_seq::Int32
next_refid::Int32 next_refid::Int32
next_pos::Int32 next_pos::Int32
tlen::Int32 tlen::Int32
# variable length data # variable length data
data::Vector{UInt8} data::Vector{UInt8}
reader::Reader reader::Union{Reader, Nothing}
function Record() function Record()
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[]) return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
end end
end end
@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
return a.block_size == b.block_size && return a.block_size == b.block_size &&
a.refid == b.refid && a.refid == b.refid &&
a.pos == b.pos && a.pos == b.pos &&
a.bin_mq_nl == b.bin_mq_nl && a.l_read_name == b.l_read_name &&
a.flag_nc == b.flag_nc && a.mapq == b.mapq &&
a.bin == b.bin &&
a.n_cigar_op == b.n_cigar_op &&
a.flag == b.flag &&
a.l_seq == b.l_seq && a.l_seq == b.l_seq &&
a.next_refid == b.next_refid && a.next_refid == b.next_refid &&
a.next_pos == b.next_pos && a.next_pos == b.next_pos &&
@ -60,19 +66,13 @@ end
function Base.copy(record::Record) function Base.copy(record::Record)
copy = Record() copy = Record()
copy.block_size = record.block_size GC.@preserve copy record begin
copy.refid = record.refid dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
copy.pos = record.pos src_pointer = Ptr{UInt8}(pointer_from_objref(record))
copy.bin_mq_nl = record.bin_mq_nl unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
copy.flag_nc = record.flag_nc
copy.l_seq = record.l_seq
copy.next_refid = record.next_refid
copy.next_pos = record.next_pos
copy.tlen = record.tlen
copy.data = record.data[1:data_size(record)]
if isdefined(record, :reader)
copy.reader = record.reader
end end
copy.data = record.data[1:data_size(record)]
copy.reader = record.reader
return copy return copy
end end
@ -80,8 +80,11 @@ function Base.empty!(record::Record)
record.block_size = 0 record.block_size = 0
record.refid = 0 record.refid = 0
record.pos = 0 record.pos = 0
record.bin_mq_nl = 0 record.l_read_name = 0
record.flag_nc = 0 record.mapq = 0
record.bin = 0
record.flag = 0
record.n_cigar_op = 0
record.l_seq = 0 record.l_seq = 0
record.next_refid = 0 record.next_refid = 0
record.next_pos = 0 record.next_pos = 0
@ -132,7 +135,7 @@ Get the bitwise flag of `record`.
""" """
function flag(record::Record)::UInt16 function flag(record::Record)::UInt16
checkfilled(record) checkfilled(record)
return UInt16(record.flag_nc >> 16) return record.flag
end end
function hasflag(record::Record) function hasflag(record::Record)
@ -318,7 +321,7 @@ end
Get the mapping quality of `record`. Get the mapping quality of `record`.
""" """
function mappingquality(record::Record)::UInt8 function mappingquality(record::Record)::UInt8
return UInt8((record.bin_mq_nl >> 8) & 0xff) return record.mapq
end end
function hasmappingquality(record::Record) function hasmappingquality(record::Record)
@ -397,7 +400,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
end end
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int} function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
if !checkCG if !checkCG
return cigaridx, nops return cigaridx, nops
end end
@ -661,5 +664,5 @@ end
# Return the length of the read name. # Return the length of the read name.
function seqname_length(record::Record) function seqname_length(record::Record)
return record.bin_mq_nl & 0xff return record.l_read_name
end end

View file

@ -104,9 +104,12 @@
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize) unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
end end
new_record = convert(BAM.Record, array) new_record = convert(BAM.Record, array)
@test record.bin_mq_nl == new_record.bin_mq_nl @test record.l_read_name == new_record.l_read_name
@test record.mapq == new_record.mapq
@test record.bin == new_record.bin
@test record.block_size == new_record.block_size @test record.block_size == new_record.block_size
@test record.flag_nc == new_record.flag_nc @test record.flag == new_record.flag
@test record.n_cigar_op == new_record.n_cigar_op
@test record.l_seq == new_record.l_seq @test record.l_seq == new_record.l_seq
@test record.next_refid == new_record.next_refid @test record.next_refid == new_record.next_refid
@test record.next_pos == new_record.next_pos @test record.next_pos == new_record.next_pos
@ -162,18 +165,21 @@
if length(xs) != length(ys) if length(xs) != length(ys)
return false return false
end end
for (x, y) in zip(xs, ys) for (a, b) in zip(xs, ys)
if !( if !(
x.block_size == y.block_size && a.block_size == b.block_size &&
x.refid == y.refid && a.refid == b.refid &&
x.pos == y.pos && a.pos == b.pos &&
x.bin_mq_nl == y.bin_mq_nl && a.l_read_name == b.l_read_name &&
x.flag_nc == y.flag_nc && a.mapq == b.mapq &&
x.l_seq == y.l_seq && a.bin == b.bin &&
x.next_refid == y.next_refid && a.n_cigar_op == b.n_cigar_op &&
x.next_pos == y.next_pos && a.flag == b.flag &&
x.tlen == y.tlen && a.l_seq == b.l_seq &&
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)]) a.next_refid == b.next_refid &&
a.next_pos == b.next_pos &&
a.tlen == b.tlen &&
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
return false return false
end end
end end