From 433b8d14e8e11735d4196b71bf29f83482cd3ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Thu, 13 Aug 2020 16:45:51 +1000 Subject: [PATCH 1/2] Begin crosschecks --- test/runtests.jl | 1 + test/test_crosscheck.jl | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 test/test_crosscheck.jl diff --git a/test/runtests.jl b/test/runtests.jl index 970c3a1..beede29 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,3 +25,4 @@ end include("test_sam.jl") include("test_bam.jl") +include("test_crosscheck.jl") diff --git a/test/test_crosscheck.jl b/test/test_crosscheck.jl new file mode 100644 index 0000000..0e853ef --- /dev/null +++ b/test/test_crosscheck.jl @@ -0,0 +1,54 @@ +@testset "Cross Check Properties" begin + + Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl. + Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl + + function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol) + + bam_property = getproperty(XAM.BAM, property) + sam_property = getproperty(XAM.SAM, property) + + if bam_property(bam) != sam_property(sam) + @warn "$property result is not the same" bam_property(bam) sam_property(sam) + return false + end + + return true + end + + samdir = path_of_format("SAM") + bamdir = path_of_format("BAM") + + filenames = [ + "ce#1", + "ce#2", + "ce#5", + "ce#5b", + "ce#unmap", + "ce#unmap1", + "ce#unmap2", + ] + + properties = [ + :position,# POS + :tempname,# QNAME + :mappingquality,# MAPQ + :cigar, # CIGAR + :flag, # FLAG + :sequence, # SEQ + :nextposition, # PNEXT + :templength, # TLEN + ] + + for filename in filenames + + records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam"))) + records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam"))) + + for (bam, sam) in zip(records_bam, records_sam) + @test all(crosscheck.(bam, sam, properties)) == true + end + + end + +end # testset Crosscheck From d38c2becf0dc15b27bd40237f313db74cae593b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Thu, 13 Aug 2020 14:06:43 +1000 Subject: [PATCH 2/2] Minimal adjustements --- src/bam/record.jl | 5 ++++- src/sam/record.jl | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/bam/record.jl b/src/bam/record.jl index a70bb17..ad911b4 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -511,9 +511,12 @@ end Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.LongDNASeq +function sequence(record::Record) checkfilled(record) seqlen = seqlength(record) + if seqlen == 0 + return nothing + end data = Vector{UInt64}(undef, cld(seqlen, 16)) src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1) for i in 1:lastindex(data) diff --git a/src/sam/record.jl b/src/sam/record.jl index 2d94a12..dcddf42 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -206,9 +206,9 @@ Get the 1-based leftmost mapping position of `record`. function position(record::Record)::Int checkfilled(record) pos = unsafe_parse_decimal(Int, record.data, record.pos) - if pos == 0 - missingerror(:position) - end + # if pos == 0 + # missingerror(:position) + # end return pos end @@ -263,9 +263,9 @@ Get the position of the mate/next read of `record`. function nextposition(record::Record)::Int checkfilled(record) pos = unsafe_parse_decimal(Int, record.data, record.pnext) - if pos == 0 - missingerror(:nextposition) - end + # if pos == 0 + # missingerror(:nextposition) + # end return pos end @@ -299,7 +299,8 @@ Get the CIGAR string of `record`. function cigar(record::Record)::String checkfilled(record) if ismissing(record, record.cigar) - missingerror(:cigar) + # missingerror(:cigar) + return "" end return String(record.data[record.cigar]) end @@ -377,9 +378,9 @@ Get the template length of `record`. function templength(record::Record)::Int checkfilled(record) len = unsafe_parse_decimal(Int, record.data, record.tlen) - if len == 0 - missingerror(:tlen) - end + # if len == 0 + # missingerror(:tlen) + # end return len end @@ -392,10 +393,11 @@ end Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.LongDNASeq +function sequence(record::Record) checkfilled(record) if ismissing(record, record.seq) - missingerror(:sequence) + # missingerror(:sequence) + return nothing end seqlen = length(record.seq) ret = BioSequences.LongDNASeq(seqlen)