1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-12-23 13:28:16 +00:00

Merge pull request #29 from CiaranOMara/feature/crosscheck

Begin crosschecks
This commit is contained in:
Ciarán O'Mara 2020-10-20 01:50:13 +11:00 committed by GitHub
commit 8836b4dbb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 73 additions and 13 deletions

View file

@ -511,9 +511,12 @@ end
Get the segment sequence of `record`.
"""
function sequence(record::Record)::BioSequences.LongDNASeq
function sequence(record::Record)
checkfilled(record)
seqlen = seqlength(record)
if seqlen == 0
return nothing
end
data = Vector{UInt64}(undef, cld(seqlen, 16))
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
for i in 1:lastindex(data)

View file

@ -206,9 +206,9 @@ Get the 1-based leftmost mapping position of `record`.
function position(record::Record)::Int
checkfilled(record)
pos = unsafe_parse_decimal(Int, record.data, record.pos)
if pos == 0
missingerror(:position)
end
# if pos == 0
# missingerror(:position)
# end
return pos
end
@ -263,9 +263,9 @@ Get the position of the mate/next read of `record`.
function nextposition(record::Record)::Int
checkfilled(record)
pos = unsafe_parse_decimal(Int, record.data, record.pnext)
if pos == 0
missingerror(:nextposition)
end
# if pos == 0
# missingerror(:nextposition)
# end
return pos
end
@ -299,7 +299,8 @@ Get the CIGAR string of `record`.
function cigar(record::Record)::String
checkfilled(record)
if ismissing(record, record.cigar)
missingerror(:cigar)
# missingerror(:cigar)
return ""
end
return String(record.data[record.cigar])
end
@ -377,9 +378,9 @@ Get the template length of `record`.
function templength(record::Record)::Int
checkfilled(record)
len = unsafe_parse_decimal(Int, record.data, record.tlen)
if len == 0
missingerror(:tlen)
end
# if len == 0
# missingerror(:tlen)
# end
return len
end
@ -392,10 +393,11 @@ end
Get the segment sequence of `record`.
"""
function sequence(record::Record)::BioSequences.LongDNASeq
function sequence(record::Record)
checkfilled(record)
if ismissing(record, record.seq)
missingerror(:sequence)
# missingerror(:sequence)
return nothing
end
seqlen = length(record.seq)
ret = BioSequences.LongDNASeq(seqlen)

View file

@ -25,3 +25,4 @@ end
include("test_sam.jl")
include("test_bam.jl")
include("test_crosscheck.jl")

54
test/test_crosscheck.jl Normal file
View file

@ -0,0 +1,54 @@
@testset "Cross Check Properties" begin
Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl.
Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl
function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol)
bam_property = getproperty(XAM.BAM, property)
sam_property = getproperty(XAM.SAM, property)
if bam_property(bam) != sam_property(sam)
@warn "$property result is not the same" bam_property(bam) sam_property(sam)
return false
end
return true
end
samdir = path_of_format("SAM")
bamdir = path_of_format("BAM")
filenames = [
"ce#1",
"ce#2",
"ce#5",
"ce#5b",
"ce#unmap",
"ce#unmap1",
"ce#unmap2",
]
properties = [
:position,# POS
:tempname,# QNAME
:mappingquality,# MAPQ
:cigar, # CIGAR
:flag, # FLAG
:sequence, # SEQ
:nextposition, # PNEXT
:templength, # TLEN
]
for filename in filenames
records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam")))
records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam")))
for (bam, sam) in zip(records_bam, records_sam)
@test all(crosscheck.(bam, sam, properties)) == true
end
end
end # testset Crosscheck