2020-05-27 11:21:12 +00:00
|
|
|
@testset "BAM" begin
|
|
|
|
bamdir = path_of_format("BAM")
|
|
|
|
|
|
|
|
@testset "AuxData" begin
|
|
|
|
auxdata = BAM.AuxData(UInt8[])
|
|
|
|
@test isempty(auxdata)
|
|
|
|
|
|
|
|
buf = IOBuffer()
|
|
|
|
write(buf, "NM", UInt8('s'), Int16(1))
|
|
|
|
auxdata = BAM.AuxData(take!(buf))
|
|
|
|
@test length(auxdata) == 1
|
|
|
|
@test auxdata["NM"] === Int16(1)
|
|
|
|
@test collect(auxdata) == ["NM" => Int16(1)]
|
|
|
|
|
|
|
|
buf = IOBuffer()
|
|
|
|
write(buf, "AS", UInt8('c'), Int8(-18))
|
|
|
|
write(buf, "NM", UInt8('s'), Int16(1))
|
|
|
|
write(buf, "XA", UInt8('f'), Float32(3.14))
|
|
|
|
write(buf, "XB", UInt8('Z'), "some text\0")
|
|
|
|
write(buf, "XC", UInt8('B'), UInt8('i'), Int32(3), Int32[10, -5, 8])
|
|
|
|
auxdata = BAM.AuxData(take!(buf))
|
|
|
|
@test length(auxdata) == 5
|
|
|
|
@test auxdata["AS"] === Int8(-18)
|
|
|
|
@test auxdata["NM"] === Int16(1)
|
|
|
|
@test auxdata["XA"] === Float32(3.14)
|
|
|
|
@test auxdata["XB"] == "some text"
|
|
|
|
@test auxdata["XC"] == Int32[10, -5, 8]
|
|
|
|
@test convert(Dict{String,Any}, auxdata) == Dict(
|
|
|
|
"AS" => Int8(-18),
|
|
|
|
"NM" => Int16(1),
|
|
|
|
"XA" => Float32(3.14),
|
|
|
|
"XB" => "some text",
|
|
|
|
"XC" => Int32[10, -5, 8])
|
|
|
|
end
|
|
|
|
|
|
|
|
@testset "Record" begin
|
|
|
|
record = BAM.Record()
|
|
|
|
@test !isfilled(record)
|
|
|
|
@test repr(record) == "XAM.BAM.Record: <not filled>"
|
|
|
|
@test_throws ArgumentError BAM.flag(record)
|
|
|
|
end
|
|
|
|
|
|
|
|
@testset "Reader" begin
|
|
|
|
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
|
|
|
@test isa(reader, BAM.Reader)
|
|
|
|
@test eltype(reader) === BAM.Record
|
|
|
|
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
|
|
|
|
|
|
|
|
# header
|
|
|
|
h = header(reader)
|
|
|
|
@test isa(h, SAM.Header)
|
|
|
|
|
|
|
|
# first record
|
|
|
|
record = BAM.Record()
|
|
|
|
read!(reader, record)
|
|
|
|
@test BAM.ismapped(record)
|
|
|
|
@test BAM.isprimary(record)
|
|
|
|
@test ! BAM.ispositivestrand(record)
|
|
|
|
@test BAM.refname(record) == "CHROMOSOME_I"
|
|
|
|
@test BAM.refid(record) === 1
|
|
|
|
@test BAM.hasnextrefid(record)
|
|
|
|
@test BAM.nextrefid(record) === 0
|
|
|
|
@test BAM.hasposition(record) === hasleftposition(record) === true
|
|
|
|
@test BAM.position(record) === leftposition(record) === 2
|
|
|
|
@test BAM.hasnextposition(record)
|
|
|
|
@test BAM.nextposition(record) === 0
|
|
|
|
@test rightposition(record) == 102
|
|
|
|
@test BAM.hastempname(record) === hasseqname(record) === true
|
|
|
|
@test BAM.tempname(record) == seqname(record) == "SRR065390.14978392"
|
|
|
|
@test BAM.hassequence(record) === hassequence(record) === true
|
|
|
|
@test BAM.sequence(record) == sequence(record) == dna"""
|
|
|
|
CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCT
|
|
|
|
AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA
|
|
|
|
"""
|
|
|
|
@test BAM.seqlength(record) === 100
|
|
|
|
@test BAM.hasquality(record)
|
|
|
|
@test eltype(BAM.quality(record)) == UInt8
|
|
|
|
@test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"]
|
|
|
|
@test BAM.flag(record) === UInt16(16)
|
|
|
|
@test BAM.cigar(record) == "27M1D73M"
|
|
|
|
@test BAM.alignment(record) == Alignment([
|
2022-10-10 13:51:36 +00:00
|
|
|
AlignmentAnchor( 0, 1, 0, OP_START),
|
|
|
|
AlignmentAnchor( 27, 28, 27, OP_MATCH),
|
|
|
|
AlignmentAnchor( 27, 29, 28, OP_DELETE),
|
|
|
|
AlignmentAnchor(100, 102, 101, OP_MATCH)])
|
2020-05-27 11:21:12 +00:00
|
|
|
@test record["XG"] == 1
|
|
|
|
@test record["XM"] == 5
|
|
|
|
@test record["XN"] == 0
|
|
|
|
@test record["XO"] == 1
|
|
|
|
@test record["AS"] == -18
|
|
|
|
@test record["XS"] == -18
|
|
|
|
@test record["YT"] == "UU"
|
|
|
|
@test keys(record) == ["XG","XM","XN","XO","AS","XS","YT"]
|
|
|
|
@test values(record) == [1, 5, 0, 1, -18, -18, "UU"]
|
|
|
|
@test eof(reader)
|
|
|
|
close(reader)
|
|
|
|
|
|
|
|
# Test conversion from byte array to record
|
|
|
|
dsize = BAM.data_size(record)
|
|
|
|
array = Vector{UInt8}(undef, BAM.FIXED_FIELDS_BYTES + dsize)
|
|
|
|
GC.@preserve array record begin
|
|
|
|
ptr = Ptr{UInt8}(pointer_from_objref(record))
|
|
|
|
unsafe_copyto!(pointer(array), ptr, BAM.FIXED_FIELDS_BYTES)
|
|
|
|
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
|
|
|
|
end
|
|
|
|
new_record = convert(BAM.Record, array)
|
2020-07-28 11:17:50 +00:00
|
|
|
@test record.l_read_name == new_record.l_read_name
|
|
|
|
@test record.mapq == new_record.mapq
|
|
|
|
@test record.bin == new_record.bin
|
2020-05-27 11:21:12 +00:00
|
|
|
@test record.block_size == new_record.block_size
|
2020-07-28 11:17:50 +00:00
|
|
|
@test record.flag == new_record.flag
|
|
|
|
@test record.n_cigar_op == new_record.n_cigar_op
|
2020-05-27 11:21:12 +00:00
|
|
|
@test record.l_seq == new_record.l_seq
|
|
|
|
@test record.next_refid == new_record.next_refid
|
|
|
|
@test record.next_pos == new_record.next_pos
|
|
|
|
@test record.refid == new_record.refid
|
|
|
|
@test record.pos == new_record.pos
|
|
|
|
@test record.tlen == new_record.tlen
|
|
|
|
@test record.data == new_record.data
|
|
|
|
|
|
|
|
# rightposition (also implicitly alignlength)
|
|
|
|
records = collect(open(BAM.Reader, joinpath(bamdir, "ce#5b.bam")))
|
|
|
|
@test BAM.rightposition(records[6]) == rightposition(records[6]) == 83
|
|
|
|
|
|
|
|
# iterator
|
|
|
|
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#1.bam")))) == 1
|
|
|
|
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#2.bam")))) == 2
|
|
|
|
|
|
|
|
# IOStream
|
|
|
|
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#1.bam"))))) == 1
|
|
|
|
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#2.bam"))))) == 2
|
|
|
|
end
|
|
|
|
|
|
|
|
@testset "Read long CIGARs" begin
|
|
|
|
function get_cigar_lens(rec::BAM.Record)
|
|
|
|
cigar_ops, cigar_n = BAM.cigar_rle(rec)
|
|
|
|
field_ops, field_n = BAM.cigar_rle(rec, false)
|
|
|
|
cigar_l = length(cigar_ops)
|
|
|
|
field_l = length(field_ops)
|
|
|
|
return cigar_l, field_l
|
|
|
|
end
|
|
|
|
|
|
|
|
function check_cigar_vs_field(rec::BAM.Record)
|
|
|
|
cigar = BAM.cigar(rec)
|
|
|
|
field = BAM.cigar(rec, false)
|
|
|
|
cigar_l, field_l = get_cigar_lens(rec)
|
|
|
|
return cigar != field && cigar_l != field_l
|
|
|
|
end
|
|
|
|
|
|
|
|
function check_cigar_lens(rec::BAM.Record, field_len, cigar_len)
|
|
|
|
cigar_l, field_l = get_cigar_lens(rec)
|
|
|
|
return cigar_l == cigar_len && field_l == field_len
|
|
|
|
end
|
|
|
|
|
|
|
|
reader = open(BAM.Reader, joinpath(bamdir, "cigar-64k.bam"))
|
|
|
|
rec = BAM.Record()
|
|
|
|
read!(reader, rec)
|
|
|
|
@test !check_cigar_vs_field(rec)
|
|
|
|
read!(reader, rec)
|
|
|
|
@test check_cigar_vs_field(rec)
|
|
|
|
@test check_cigar_lens(rec, 2, 72091)
|
|
|
|
end
|
|
|
|
|
|
|
|
function compare_records(xs, ys)
|
|
|
|
if length(xs) != length(ys)
|
|
|
|
return false
|
|
|
|
end
|
2020-07-28 11:17:50 +00:00
|
|
|
for (a, b) in zip(xs, ys)
|
2020-05-27 11:21:12 +00:00
|
|
|
if !(
|
2020-07-28 11:17:50 +00:00
|
|
|
a.block_size == b.block_size &&
|
|
|
|
a.refid == b.refid &&
|
|
|
|
a.pos == b.pos &&
|
|
|
|
a.l_read_name == b.l_read_name &&
|
|
|
|
a.mapq == b.mapq &&
|
|
|
|
a.bin == b.bin &&
|
|
|
|
a.n_cigar_op == b.n_cigar_op &&
|
|
|
|
a.flag == b.flag &&
|
|
|
|
a.l_seq == b.l_seq &&
|
|
|
|
a.next_refid == b.next_refid &&
|
|
|
|
a.next_pos == b.next_pos &&
|
|
|
|
a.tlen == b.tlen &&
|
|
|
|
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
|
2020-05-27 11:21:12 +00:00
|
|
|
return false
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
|
|
|
|
@testset "Round trip" begin
|
|
|
|
for specimen in list_valid_specimens("BAM")
|
|
|
|
filepath = joinpath(bamdir, filename(specimen))
|
|
|
|
mktemp() do path, _
|
|
|
|
# copy
|
|
|
|
if hastags(specimen) && in("bai", tags(specimen))
|
|
|
|
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
|
|
|
|
else
|
|
|
|
reader = open(BAM.Reader, filepath)
|
|
|
|
end
|
|
|
|
|
|
|
|
header_original = header(reader)
|
|
|
|
|
|
|
|
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
|
|
|
|
|
|
|
|
records = BAM.Record[]
|
|
|
|
for record in reader
|
|
|
|
push!(records, record)
|
|
|
|
write(writer, record)
|
|
|
|
end
|
|
|
|
close(reader)
|
|
|
|
close(writer)
|
|
|
|
|
2023-05-02 04:56:48 +00:00
|
|
|
|
|
|
|
# Check that EOF_BLOCK gets written.
|
|
|
|
nbytes = filesize(path)
|
|
|
|
@test BAM.BGZFStreams.EOF_BLOCK == open(path) do io
|
|
|
|
seek(io, nbytes - length(BAM.BGZFStreams.EOF_BLOCK))
|
|
|
|
read(io)
|
|
|
|
end
|
|
|
|
|
2020-05-27 11:21:12 +00:00
|
|
|
reader = open(BAM.Reader, path)
|
|
|
|
|
|
|
|
@test header(reader) == header_original
|
|
|
|
@test compare_records(collect(reader), records)
|
|
|
|
|
|
|
|
close(reader)
|
|
|
|
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@testset "In-Place-Reading Pattern" begin
|
|
|
|
|
|
|
|
file_bam = joinpath(bamdir, "ce#5b.bam")
|
|
|
|
|
|
|
|
records = open(collect, BAM.Reader, file_bam)
|
|
|
|
|
|
|
|
reader = open(BAM.Reader, file_bam)
|
|
|
|
record = BAM.Record()
|
|
|
|
i = 0
|
|
|
|
while !eof(reader)
|
|
|
|
empty!(record) # Reset the record.
|
|
|
|
read!(reader, record)
|
|
|
|
|
|
|
|
i = i + 1
|
|
|
|
@test records[i] == record
|
|
|
|
end
|
|
|
|
|
|
|
|
close(reader)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
2022-10-13 13:06:39 +00:00
|
|
|
@testset "BAI" begin
|
|
|
|
|
|
|
|
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
|
|
|
|
|
|
|
|
index = BAM.BAI(filepath * ".bai")
|
|
|
|
reader = open(BAM.Reader, filepath, index=index)
|
2023-05-02 04:56:48 +00:00
|
|
|
|
2022-10-13 13:06:39 +00:00
|
|
|
@test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator)
|
|
|
|
|
|
|
|
close(reader)
|
|
|
|
|
2023-05-02 04:56:48 +00:00
|
|
|
@test_throws ErrorException open(BAM.Reader, filepath, index=1234)
|
2022-10-13 13:06:39 +00:00
|
|
|
|
|
|
|
end
|
|
|
|
|
2020-05-27 11:21:12 +00:00
|
|
|
@testset "Random access" begin
|
|
|
|
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
|
|
|
|
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
|
|
|
|
|
|
|
|
@test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator)
|
|
|
|
@test isa(eachoverlap(reader, GenomicFeatures.Interval("chr1", 1, 100)), BAM.OverlapIterator)
|
|
|
|
|
|
|
|
# expected values are counted using samtools
|
|
|
|
for (refname, interval, expected) in [
|
|
|
|
("chr1", 1_000:10000, 21),
|
|
|
|
("chr1", 8_000:10000, 20),
|
|
|
|
("chr1", 766_000:800_000, 142),
|
|
|
|
("chr1", 786_000:800_000, 1),
|
|
|
|
("chr1", 796_000:800_000, 0)]
|
|
|
|
intsect = eachoverlap(reader, refname, interval)
|
|
|
|
@test eltype(intsect) == BAM.Record
|
|
|
|
@test count(_ -> true, intsect) == expected
|
|
|
|
# check that the intersection iterator is stateless
|
|
|
|
@test count(_ -> true, intsect) == expected
|
|
|
|
end
|
|
|
|
|
|
|
|
# randomized tests
|
|
|
|
for n in 1:50
|
|
|
|
refindex = 1
|
|
|
|
refname = "chr1"
|
|
|
|
range = randrange(1:1_000_000)
|
|
|
|
seekstart(reader)
|
|
|
|
# linear scan
|
|
|
|
expected = filter(collect(reader)) do record
|
|
|
|
BAM.compare_intervals(record, (refindex, range)) == 0
|
|
|
|
end
|
|
|
|
# indexed scan
|
|
|
|
actual = collect(eachoverlap(reader, refname, range))
|
|
|
|
@test compare_records(actual, expected)
|
|
|
|
end
|
|
|
|
close(reader)
|
|
|
|
|
|
|
|
filepath = joinpath(bamdir, "R_12h_D06.uniq.q40.bam")
|
|
|
|
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
|
|
|
|
@test isempty(collect(eachoverlap(reader, "chr19", 5823708:5846478)))
|
|
|
|
close(reader)
|
|
|
|
end
|
|
|
|
end
|