1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-11-14 22:33:14 +00:00

Merge branch 'release/v0.2.5'

This commit is contained in:
Ciarán O'Mara 2020-06-22 10:48:03 +10:00
commit dfa5a3217f
18 changed files with 562 additions and 609 deletions

View file

@ -2,37 +2,16 @@ name: CompatHelper
on:
schedule:
- cron: '00 * * * *'
- cron: '0 0 * * *'
jobs:
CompatHelper:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.3.0]
julia-arch: [x86]
os: [ubuntu-latest]
runs-on: ubuntu-latest
steps:
- uses: julia-actions/setup-julia@latest
with:
version: ${{ matrix.julia-version }}
- name: Add CompatHelper
run: julia -e 'using Pkg; Pkg.add("CompatHelper")'
- name: CompatHelper.main
run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")'
- name: Run CompatHelper
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: julia -e '
using CompatHelper, Pkg;
my_registries = [
Pkg.RegistrySpec(
name = "BioJuliaRegistry",
uuid = "ccbd2cc2-2954-11e9-1ccf-f3e7900901ca",
url = "https://github.com/BioJulia/BioJuliaRegistry.git"
),
Pkg.RegistrySpec(
name = "General",
uuid = "23338594-aafe-5451-b93e-139f81909106",
url = "https://github.com/JuliaRegistries/General.git"
)
];
CompatHelper.main(; registries = my_registries, master_branch = "master");'
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")'

View file

@ -1,34 +1,26 @@
name: Documentation
name: Build Documentation
on:
push:
branches:
- 'master'
- 'develop'
- 'release/.*'
tags: '*'
pull_request:
push:
branches:
- master
- develop
- release/*
tags: '*'
pull_request:
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: [1.3.0]
julia-arch: [x86]
os: [ubuntu-latest]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1.0.0
- uses: julia-actions/setup-julia@latest
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.julia-version }}
- name: Install dependencies
run: |
julia ci_prep.jl;
julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and deploy
version: '1'
- name: Install Dependencies
run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and Deploy
env:
# https://github.com/JuliaDocs/Documenter.jl/issues/1177
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
run: julia --project=docs/ --color=yes docs/make.jl
run: julia --color=yes --project=docs/ docs/make.jl

View file

@ -1,7 +1,7 @@
name: TagBot
on:
schedule:
- cron: '0 * * * *'
- cron: '0 0 * * *'
jobs:
TagBot:
runs-on: ubuntu-latest
@ -10,4 +10,3 @@ jobs:
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.TAGBOT_KEY }}
registry: BioJulia/BioJuliaRegistry

View file

@ -1,22 +1,44 @@
name: Unit tests
name: Unit Tests
on: [push, pull_request]
on:
- push
- pull_request
jobs:
test:
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: false
matrix:
julia-version: ['1.1', '1.2', '1.3']
julia-arch: [x64]
julia-version:
- '1.0' # LTS
- '1'
julia-arch: [x64, x86]
os: [ubuntu-latest, windows-latest, macOS-latest]
experimental: [false]
include:
- julia-version: nightly
julia-arch: x64
os: ubuntu-latest
experimental: true
steps:
- uses: actions/checkout@v1.0.0
- uses: julia-actions/setup-julia@v1
- name: Checkout Repository
uses: actions/checkout@v2
- name: Setup Julia
uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.julia-version }}
arch: ${{ matrix.julia-arch }}
- name: Install dependencies
run: julia ci_prep.jl
- uses: julia-actions/julia-runtest@master
- name: Run Tests
uses: julia-actions/julia-runtest@latest
- name: Create CodeCov
uses: julia-actions/julia-processcoverage@v1
- name: Upload CodeCov
uses: codecov/codecov-action@v1
with:
file: ./lcov.info
flags: unittests
name: codecov-umbrella
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}

View file

@ -1,7 +1,7 @@
name = "XAM"
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
version = "0.2.4"
version = "0.2.5"
[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
@ -19,11 +19,11 @@ Automa = "0.7, 0.8"
BGZFStreams = "0.3"
BioAlignments = "2"
BioGenerics = "0.1"
BioSequences = "2"
BioSequences = "2.0.4"
GenomicFeatures = "2"
Indexes = "0.1"
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
julia = "1.1"
julia = "1"
[extras]
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"

View file

@ -11,18 +11,11 @@
blog post").
## Description
XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.
The XAM package provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.
## Installation
The latest version of XAM is made available to install through BioJulia's package registry.
By default, Julia's package manager only includes the "General" package registry.
To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command:
```julia
registry add https://github.com/BioJulia/BioJuliaRegistry.git
```
Once the registry is added to your configuration, you can install XAM while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following:
You can install the XAM package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/).
Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command:
```julia
add XAM
```

View file

@ -1,3 +0,0 @@
using Pkg.Registry
Registry.add(Registry.RegistrySpec(url = "https://github.com/BioJulia/BioJuliaRegistry.git"))
Registry.add(Registry.RegistrySpec(url = "https://github.com/JuliaRegistries/General.git"))

View file

@ -1,2 +0,0 @@
[deps]
Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037"

View file

@ -1,11 +0,0 @@
get(ENV, "TRAVIS_OS_NAME", "") == "linux" || exit()
get(ENV, "TRAVIS_JULIA_VERSION", "") == "1.3" || exit()
using Pkg
Pkg.instantiate()
using Coverage
cd(joinpath(@__DIR__, "..")) do
Codecov.submit(Codecov.process_folder())
end

View file

@ -10,20 +10,11 @@
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).
## Description
XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.
The XAM package provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.
## Installation
The latest version of XAM is made available to install through BioJulia's package registry.
By default, Julia's package manager only includes the "General" package registry.
To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command:
```julia
registry add https://github.com/BioJulia/BioJuliaRegistry.git
```
Once the registry is added to your configuration, you can install XAM while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following:
You can install the XAM package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/).
Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command:
```julia
add XAM
```

View file

@ -287,7 +287,7 @@ SAM.Writer(IOStream(<file my-data.sam>))
```
To make a BAM Writer is slightly different, as you need to use a specific stream type from the (https://github.com/BioJulia/BGZFStreams.jl)[https://github.com/BioJulia/BGZFStreams.jl] package:
To make a BAM Writer is slightly different, as you need to use a specific stream type from the [https://github.com/BioJulia/BGZFStreams.jl](https://github.com/BioJulia/BGZFStreams.jl) package:
```julia
julia> using BGZFStreams

View file

@ -48,7 +48,7 @@ Create a SAM metainfo from `str`.
"""
function MetaInfo(str::AbstractString)
return MetaInfo(Vector{UInt8}(str))
return MetaInfo(collect(UInt8, str))
end
"""
@ -198,7 +198,7 @@ function keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}}
if iscomment(metainfo)
throw(ArgumentError("not a dictionary"))
end
return Pair{String,String}[key => val for (key, val) in zip(keys(metainfo), values(metainfo))]
return Pair{String, String}.(keys(metainfo), values(metainfo))
end
function Base.keys(metainfo::MetaInfo)

View file

@ -1,12 +1,16 @@
# Automa.jl generated readrecord! and readmetainfo! functions
# ========================================
import Automa
import Automa.RegExp: @re_str
import Automa.Stream: @mark, @markpos, @relpos, @abspos
# file = header . body
# header = metainfo*
# body = record*
const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = (function ()
isinteractive() && info("compiling SAM")
isinteractive() && @info "compiling SAM"
cat = Automa.RegExp.cat
rep = Automa.RegExp.rep
@ -161,21 +165,15 @@ function appendfrom!(dst, dpos, src, spos, n)
if length(dst) < dpos + n - 1
resize!(dst, dpos + n - 1)
end
copyto!(dst, dpos, src, spos, n)
unsafe_copyto!(dst, dpos, src, spos, n)
return dst
end
const action_metainfo = quote
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
metainfo.filled = 1:(p-@markpos)
let markpos = @markpos()
appendfrom!(metainfo.data, 1, data, markpos, length(markpos:p-1))
metainfo.filled = @relpos(markpos):@relpos(p-1)
found_metainfo = true
end
found_metainfo = true
end
const sam_actions_metainfo = Dict(
@ -227,15 +225,11 @@ const sam_actions_record = Dict(
:record_qual => :(record.qual = pos:@relpos(p-1)),
:record_field => :(push!(record.fields, pos:@relpos(p-1))),
:record => quote
let markpos = @markpos()
appendfrom!(record.data, 1, data, @markpos, p-@markpos)
record.filled = 1:(p-@markpos)
appendfrom!(record.data, 1, data, markpos, length(markpos:p-1))
record.filled = @relpos(markpos):@relpos(p-1)
found_record = true
@escape
end
found_record = true
@escape
end
)

View file

@ -71,7 +71,7 @@ function Record(str::AbstractString)
end
function Base.convert(::Type{Record}, str::AbstractString)
return Record(Vector{UInt8}(str))
return Record(collect(UInt8, str))
end
function Base.:(==)(a::Record, b::Record)
@ -399,7 +399,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq
end
seqlen = length(record.seq)
ret = BioSequences.LongDNASeq(seqlen)
BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
copyto!(ret, 1, record.data, first(record.seq), seqlen)
return ret
end

View file

@ -5,9 +5,6 @@ module SAM
using BioGenerics
import Automa
import Automa.RegExp: @re_str
import Automa.Stream: @mark, @markpos, @relpos, @abspos
import BioAlignments
import BioGenerics: BioGenerics, isfilled, header
import BioGenerics.Exceptions: missingerror

View file

@ -22,482 +22,6 @@ function randrange(range)
end
end
@testset "SAM" begin
samdir = path_of_format("SAM")
@testset "MetaInfo" begin
metainfo = SAM.MetaInfo()
@test !isfilled(metainfo)
@test occursin("not filled", repr(metainfo))
metainfo = SAM.MetaInfo("CO", "some comment (parens)")
@test isfilled(metainfo)
@test string(metainfo) == "@CO\tsome comment (parens)"
@test occursin("CO", repr(metainfo))
@test SAM.tag(metainfo) == "CO"
@test SAM.value(metainfo) == "some comment (parens)"
@test_throws ArgumentError keys(metainfo)
@test_throws ArgumentError values(metainfo)
metainfo = SAM.MetaInfo("HD", ["VN" => "1.0", "SO" => "coordinate"])
@test isfilled(metainfo)
@test string(metainfo) == "@HD\tVN:1.0\tSO:coordinate"
@test occursin("HD", repr(metainfo))
@test SAM.tag(metainfo) == "HD"
@test SAM.value(metainfo) == "VN:1.0\tSO:coordinate"
@test keys(metainfo) == ["VN", "SO"]
@test values(metainfo) == ["1.0", "coordinate"]
@test SAM.keyvalues(metainfo) == ["VN" => "1.0", "SO" => "coordinate"]
@test haskey(metainfo, "VN")
@test haskey(metainfo, "SO")
@test !haskey(metainfo, "GO")
@test metainfo["VN"] == "1.0"
@test metainfo["SO"] == "coordinate"
@test_throws KeyError metainfo["GO"]
end
@testset "Header" begin
header = SAM.Header()
@test isempty(header)
push!(header, SAM.MetaInfo("@HD\tVN:1.0\tSO:coordinate"))
@test !isempty(header)
@test length(header) == 1
push!(header, SAM.MetaInfo("@CO\tsome comment"))
@test length(header) == 2
@test isa(collect(header), Vector{SAM.MetaInfo})
end
@testset "Record" begin
record = SAM.Record()
@test !isfilled(record)
@test !SAM.ismapped(record)
@test repr(record) == "XAM.SAM.Record: <not filled>"
@test_throws ArgumentError SAM.flag(record)
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
@test isfilled(record)
@test occursin(r"^XAM.SAM.Record:\n", repr(record))
@test SAM.ismapped(record)
@test SAM.isprimary(record)
@test SAM.hastempname(record)
@test SAM.tempname(record) == "r001"
@test SAM.hasflag(record)
@test SAM.flag(record) === UInt16(99)
@test SAM.hasrefname(record)
@test SAM.refname(record) == "chr1"
@test SAM.hasposition(record)
@test SAM.position(record) === 7
@test SAM.hasmappingquality(record)
@test SAM.mappingquality(record) === UInt8(30)
@test SAM.hascigar(record)
@test SAM.cigar(record) == "8M2I4M1D3M"
@test SAM.hasnextrefname(record)
@test SAM.nextrefname(record) == "="
@test SAM.hasnextposition(record)
@test SAM.nextposition(record) === 37
@test SAM.hastemplength(record)
@test SAM.templength(record) === 39
@test SAM.hassequence(record)
@test SAM.sequence(record) == dna"TTAGATAAAGGATACTG"
@test !SAM.hasquality(record)
@test_throws MissingFieldException SAM.quality(record)
end
@testset "Reader" begin
reader = open(SAM.Reader, joinpath(samdir, "ce#1.sam"))
@test isa(reader, SAM.Reader)
@test eltype(reader) === SAM.Record
# header
h = header(reader)
@test string.(findall(header(reader), "SQ")) == ["@SQ\tSN:CHROMOSOME_I\tLN:1009800"]
# first record
record = SAM.Record()
read!(reader, record)
@test SAM.ismapped(record)
@test SAM.refname(record) == "CHROMOSOME_I"
@test SAM.position(record) == leftposition(record) == 2
@test SAM.rightposition(record) == rightposition(record) == 102
@test SAM.tempname(record) == seqname(record) == "SRR065390.14978392"
@test SAM.sequence(record) == sequence(record) == dna"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA"
@test SAM.sequence(String, record) == "CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA"
@test SAM.seqlength(record) == 100
@test SAM.quality(record) == (b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" .- 33)
@test SAM.quality(String, record) == "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
@test SAM.flag(record) == 16
@test SAM.cigar(record) == "27M1D73M"
@test SAM.alignment(record) == Alignment([
AlignmentAnchor( 0, 1, OP_START),
AlignmentAnchor( 27, 28, OP_MATCH),
AlignmentAnchor( 27, 29, OP_DELETE),
AlignmentAnchor(100, 102, OP_MATCH)])
@test record["XG"] == 1
@test record["XM"] == 5
@test record["XN"] == 0
@test record["XO"] == 1
@test record["AS"] == -18
@test record["XS"] == -18
@test record["YT"] == "UU"
@test eof(reader)
close(reader)
# rightposition (also implicitly alignlength)
records = collect(open(SAM.Reader, joinpath(samdir, "ce#5b.sam")))
@test SAM.rightposition(records[6]) == rightposition(records[6]) == 83
# iterator
@test length(collect(open(SAM.Reader, joinpath(samdir, "ce#1.sam")))) == 1
@test length(collect(open(SAM.Reader, joinpath(samdir, "ce#2.sam")))) == 2
# IOStream
@test length(collect(SAM.Reader(open(joinpath(samdir, "ce#1.sam"))))) == 1
@test length(collect(SAM.Reader(open(joinpath(samdir, "ce#2.sam"))))) == 2
end
@testset "Round trip" begin
function compare_records(xs, ys)
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
if x.data[x.filled] != y.data[y.filled]
return false
end
end
return true
end
for specimen in list_valid_specimens("SAM")
filepath = joinpath(samdir, filename(specimen))
mktemp() do path, io
# copy
reader = open(SAM.Reader, filepath)
header_original = header(reader)
writer = SAM.Writer(io, header_original)
records = SAM.Record[]
for record in reader
push!(records, record)
write(writer, record)
end
close(reader)
close(writer)
reader = open(SAM.Reader, path)
@test header(reader) == header_original
@test compare_records(collect(reader), records)
close(reader)
end
end
end
@testset "In-Place-Reading Pattern" begin
file_sam = joinpath(samdir, "ce#5b.sam")
records = open(collect, SAM.Reader, file_sam)
reader = open(SAM.Reader, file_sam)
record = SAM.Record()
i = 0
while !eof(reader)
empty!(record) # Reset the record.
read!(reader, record)
i = i + 1
@test records[i] == record
end
close(reader)
end
end
@testset "BAM" begin
bamdir = path_of_format("BAM")
@testset "AuxData" begin
auxdata = BAM.AuxData(UInt8[])
@test isempty(auxdata)
buf = IOBuffer()
write(buf, "NM", UInt8('s'), Int16(1))
auxdata = BAM.AuxData(take!(buf))
@test length(auxdata) == 1
@test auxdata["NM"] === Int16(1)
@test collect(auxdata) == ["NM" => Int16(1)]
buf = IOBuffer()
write(buf, "AS", UInt8('c'), Int8(-18))
write(buf, "NM", UInt8('s'), Int16(1))
write(buf, "XA", UInt8('f'), Float32(3.14))
write(buf, "XB", UInt8('Z'), "some text\0")
write(buf, "XC", UInt8('B'), UInt8('i'), Int32(3), Int32[10, -5, 8])
auxdata = BAM.AuxData(take!(buf))
@test length(auxdata) == 5
@test auxdata["AS"] === Int8(-18)
@test auxdata["NM"] === Int16(1)
@test auxdata["XA"] === Float32(3.14)
@test auxdata["XB"] == "some text"
@test auxdata["XC"] == Int32[10, -5, 8]
@test convert(Dict{String,Any}, auxdata) == Dict(
"AS" => Int8(-18),
"NM" => Int16(1),
"XA" => Float32(3.14),
"XB" => "some text",
"XC" => Int32[10, -5, 8])
end
@testset "Record" begin
record = BAM.Record()
@test !isfilled(record)
@test repr(record) == "XAM.BAM.Record: <not filled>"
@test_throws ArgumentError BAM.flag(record)
end
@testset "Reader" begin
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
@test isa(reader, BAM.Reader)
@test eltype(reader) === BAM.Record
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
# header
h = header(reader)
@test isa(h, SAM.Header)
# first record
record = BAM.Record()
read!(reader, record)
@test BAM.ismapped(record)
@test BAM.isprimary(record)
@test ! BAM.ispositivestrand(record)
@test BAM.refname(record) == "CHROMOSOME_I"
@test BAM.refid(record) === 1
@test BAM.hasnextrefid(record)
@test BAM.nextrefid(record) === 0
@test BAM.hasposition(record) === hasleftposition(record) === true
@test BAM.position(record) === leftposition(record) === 2
@test BAM.hasnextposition(record)
@test BAM.nextposition(record) === 0
@test rightposition(record) == 102
@test BAM.hastempname(record) === hasseqname(record) === true
@test BAM.tempname(record) == seqname(record) == "SRR065390.14978392"
@test BAM.hassequence(record) === hassequence(record) === true
@test BAM.sequence(record) == sequence(record) == dna"""
CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCT
AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA
"""
@test BAM.seqlength(record) === 100
@test BAM.hasquality(record)
@test eltype(BAM.quality(record)) == UInt8
@test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"]
@test BAM.flag(record) === UInt16(16)
@test BAM.cigar(record) == "27M1D73M"
@test BAM.alignment(record) == Alignment([
AlignmentAnchor( 0, 1, OP_START),
AlignmentAnchor( 27, 28, OP_MATCH),
AlignmentAnchor( 27, 29, OP_DELETE),
AlignmentAnchor(100, 102, OP_MATCH)])
@test record["XG"] == 1
@test record["XM"] == 5
@test record["XN"] == 0
@test record["XO"] == 1
@test record["AS"] == -18
@test record["XS"] == -18
@test record["YT"] == "UU"
@test keys(record) == ["XG","XM","XN","XO","AS","XS","YT"]
@test values(record) == [1, 5, 0, 1, -18, -18, "UU"]
@test eof(reader)
close(reader)
# Test conversion from byte array to record
dsize = BAM.data_size(record)
array = Vector{UInt8}(undef, BAM.FIXED_FIELDS_BYTES + dsize)
GC.@preserve array record begin
ptr = Ptr{UInt8}(pointer_from_objref(record))
unsafe_copyto!(pointer(array), ptr, BAM.FIXED_FIELDS_BYTES)
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
end
new_record = convert(BAM.Record, array)
@test record.bin_mq_nl == new_record.bin_mq_nl
@test record.block_size == new_record.block_size
@test record.flag_nc == new_record.flag_nc
@test record.l_seq == new_record.l_seq
@test record.next_refid == new_record.next_refid
@test record.next_pos == new_record.next_pos
@test record.refid == new_record.refid
@test record.pos == new_record.pos
@test record.tlen == new_record.tlen
@test record.data == new_record.data
# rightposition (also implicitly alignlength)
records = collect(open(BAM.Reader, joinpath(bamdir, "ce#5b.bam")))
@test BAM.rightposition(records[6]) == rightposition(records[6]) == 83
# iterator
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#1.bam")))) == 1
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#2.bam")))) == 2
# IOStream
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#1.bam"))))) == 1
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#2.bam"))))) == 2
end
@testset "Read long CIGARs" begin
function get_cigar_lens(rec::BAM.Record)
cigar_ops, cigar_n = BAM.cigar_rle(rec)
field_ops, field_n = BAM.cigar_rle(rec, false)
cigar_l = length(cigar_ops)
field_l = length(field_ops)
return cigar_l, field_l
end
function check_cigar_vs_field(rec::BAM.Record)
cigar = BAM.cigar(rec)
field = BAM.cigar(rec, false)
cigar_l, field_l = get_cigar_lens(rec)
return cigar != field && cigar_l != field_l
end
function check_cigar_lens(rec::BAM.Record, field_len, cigar_len)
cigar_l, field_l = get_cigar_lens(rec)
return cigar_l == cigar_len && field_l == field_len
end
reader = open(BAM.Reader, joinpath(bamdir, "cigar-64k.bam"))
rec = BAM.Record()
read!(reader, rec)
@test !check_cigar_vs_field(rec)
read!(reader, rec)
@test check_cigar_vs_field(rec)
@test check_cigar_lens(rec, 2, 72091)
end
function compare_records(xs, ys)
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
if !(
x.block_size == y.block_size &&
x.refid == y.refid &&
x.pos == y.pos &&
x.bin_mq_nl == y.bin_mq_nl &&
x.flag_nc == y.flag_nc &&
x.l_seq == y.l_seq &&
x.next_refid == y.next_refid &&
x.next_pos == y.next_pos &&
x.tlen == y.tlen &&
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
return false
end
end
return true
end
@testset "Round trip" begin
for specimen in list_valid_specimens("BAM")
filepath = joinpath(bamdir, filename(specimen))
mktemp() do path, _
# copy
if hastags(specimen) && in("bai", tags(specimen))
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
else
reader = open(BAM.Reader, filepath)
end
header_original = header(reader)
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
records = BAM.Record[]
for record in reader
push!(records, record)
write(writer, record)
end
close(reader)
close(writer)
reader = open(BAM.Reader, path)
@test header(reader) == header_original
@test compare_records(collect(reader), records)
close(reader)
end
end
end
@testset "In-Place-Reading Pattern" begin
file_bam = joinpath(bamdir, "ce#5b.bam")
records = open(collect, BAM.Reader, file_bam)
reader = open(BAM.Reader, file_bam)
record = BAM.Record()
i = 0
while !eof(reader)
empty!(record) # Reset the record.
read!(reader, record)
i = i + 1
@test records[i] == record
end
close(reader)
end
@testset "Random access" begin
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
@test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator)
@test isa(eachoverlap(reader, GenomicFeatures.Interval("chr1", 1, 100)), BAM.OverlapIterator)
# expected values are counted using samtools
for (refname, interval, expected) in [
("chr1", 1_000:10000, 21),
("chr1", 8_000:10000, 20),
("chr1", 766_000:800_000, 142),
("chr1", 786_000:800_000, 1),
("chr1", 796_000:800_000, 0)]
intsect = eachoverlap(reader, refname, interval)
@test eltype(intsect) == BAM.Record
@test count(_ -> true, intsect) == expected
# check that the intersection iterator is stateless
@test count(_ -> true, intsect) == expected
end
# randomized tests
for n in 1:50
refindex = 1
refname = "chr1"
range = randrange(1:1_000_000)
seekstart(reader)
# linear scan
expected = filter(collect(reader)) do record
BAM.compare_intervals(record, (refindex, range)) == 0
end
# indexed scan
actual = collect(eachoverlap(reader, refname, range))
@test compare_records(actual, expected)
end
close(reader)
filepath = joinpath(bamdir, "R_12h_D06.uniq.q40.bam")
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
@test isempty(collect(eachoverlap(reader, "chr19", 5823708:5846478)))
close(reader)
end
end
include("test_sam.jl")
include("test_bam.jl")

280
test/test_bam.jl Normal file
View file

@ -0,0 +1,280 @@
@testset "BAM" begin
bamdir = path_of_format("BAM")
@testset "AuxData" begin
auxdata = BAM.AuxData(UInt8[])
@test isempty(auxdata)
buf = IOBuffer()
write(buf, "NM", UInt8('s'), Int16(1))
auxdata = BAM.AuxData(take!(buf))
@test length(auxdata) == 1
@test auxdata["NM"] === Int16(1)
@test collect(auxdata) == ["NM" => Int16(1)]
buf = IOBuffer()
write(buf, "AS", UInt8('c'), Int8(-18))
write(buf, "NM", UInt8('s'), Int16(1))
write(buf, "XA", UInt8('f'), Float32(3.14))
write(buf, "XB", UInt8('Z'), "some text\0")
write(buf, "XC", UInt8('B'), UInt8('i'), Int32(3), Int32[10, -5, 8])
auxdata = BAM.AuxData(take!(buf))
@test length(auxdata) == 5
@test auxdata["AS"] === Int8(-18)
@test auxdata["NM"] === Int16(1)
@test auxdata["XA"] === Float32(3.14)
@test auxdata["XB"] == "some text"
@test auxdata["XC"] == Int32[10, -5, 8]
@test convert(Dict{String,Any}, auxdata) == Dict(
"AS" => Int8(-18),
"NM" => Int16(1),
"XA" => Float32(3.14),
"XB" => "some text",
"XC" => Int32[10, -5, 8])
end
@testset "Record" begin
record = BAM.Record()
@test !isfilled(record)
@test repr(record) == "XAM.BAM.Record: <not filled>"
@test_throws ArgumentError BAM.flag(record)
end
@testset "Reader" begin
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
@test isa(reader, BAM.Reader)
@test eltype(reader) === BAM.Record
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
# header
h = header(reader)
@test isa(h, SAM.Header)
# first record
record = BAM.Record()
read!(reader, record)
@test BAM.ismapped(record)
@test BAM.isprimary(record)
@test ! BAM.ispositivestrand(record)
@test BAM.refname(record) == "CHROMOSOME_I"
@test BAM.refid(record) === 1
@test BAM.hasnextrefid(record)
@test BAM.nextrefid(record) === 0
@test BAM.hasposition(record) === hasleftposition(record) === true
@test BAM.position(record) === leftposition(record) === 2
@test BAM.hasnextposition(record)
@test BAM.nextposition(record) === 0
@test rightposition(record) == 102
@test BAM.hastempname(record) === hasseqname(record) === true
@test BAM.tempname(record) == seqname(record) == "SRR065390.14978392"
@test BAM.hassequence(record) === hassequence(record) === true
@test BAM.sequence(record) == sequence(record) == dna"""
CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCT
AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA
"""
@test BAM.seqlength(record) === 100
@test BAM.hasquality(record)
@test eltype(BAM.quality(record)) == UInt8
@test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"]
@test BAM.flag(record) === UInt16(16)
@test BAM.cigar(record) == "27M1D73M"
@test BAM.alignment(record) == Alignment([
AlignmentAnchor( 0, 1, OP_START),
AlignmentAnchor( 27, 28, OP_MATCH),
AlignmentAnchor( 27, 29, OP_DELETE),
AlignmentAnchor(100, 102, OP_MATCH)])
@test record["XG"] == 1
@test record["XM"] == 5
@test record["XN"] == 0
@test record["XO"] == 1
@test record["AS"] == -18
@test record["XS"] == -18
@test record["YT"] == "UU"
@test keys(record) == ["XG","XM","XN","XO","AS","XS","YT"]
@test values(record) == [1, 5, 0, 1, -18, -18, "UU"]
@test eof(reader)
close(reader)
# Test conversion from byte array to record
dsize = BAM.data_size(record)
array = Vector{UInt8}(undef, BAM.FIXED_FIELDS_BYTES + dsize)
GC.@preserve array record begin
ptr = Ptr{UInt8}(pointer_from_objref(record))
unsafe_copyto!(pointer(array), ptr, BAM.FIXED_FIELDS_BYTES)
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
end
new_record = convert(BAM.Record, array)
@test record.bin_mq_nl == new_record.bin_mq_nl
@test record.block_size == new_record.block_size
@test record.flag_nc == new_record.flag_nc
@test record.l_seq == new_record.l_seq
@test record.next_refid == new_record.next_refid
@test record.next_pos == new_record.next_pos
@test record.refid == new_record.refid
@test record.pos == new_record.pos
@test record.tlen == new_record.tlen
@test record.data == new_record.data
# rightposition (also implicitly alignlength)
records = collect(open(BAM.Reader, joinpath(bamdir, "ce#5b.bam")))
@test BAM.rightposition(records[6]) == rightposition(records[6]) == 83
# iterator
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#1.bam")))) == 1
@test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#2.bam")))) == 2
# IOStream
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#1.bam"))))) == 1
@test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#2.bam"))))) == 2
end
@testset "Read long CIGARs" begin
function get_cigar_lens(rec::BAM.Record)
cigar_ops, cigar_n = BAM.cigar_rle(rec)
field_ops, field_n = BAM.cigar_rle(rec, false)
cigar_l = length(cigar_ops)
field_l = length(field_ops)
return cigar_l, field_l
end
function check_cigar_vs_field(rec::BAM.Record)
cigar = BAM.cigar(rec)
field = BAM.cigar(rec, false)
cigar_l, field_l = get_cigar_lens(rec)
return cigar != field && cigar_l != field_l
end
function check_cigar_lens(rec::BAM.Record, field_len, cigar_len)
cigar_l, field_l = get_cigar_lens(rec)
return cigar_l == cigar_len && field_l == field_len
end
reader = open(BAM.Reader, joinpath(bamdir, "cigar-64k.bam"))
rec = BAM.Record()
read!(reader, rec)
@test !check_cigar_vs_field(rec)
read!(reader, rec)
@test check_cigar_vs_field(rec)
@test check_cigar_lens(rec, 2, 72091)
end
function compare_records(xs, ys)
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
if !(
x.block_size == y.block_size &&
x.refid == y.refid &&
x.pos == y.pos &&
x.bin_mq_nl == y.bin_mq_nl &&
x.flag_nc == y.flag_nc &&
x.l_seq == y.l_seq &&
x.next_refid == y.next_refid &&
x.next_pos == y.next_pos &&
x.tlen == y.tlen &&
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
return false
end
end
return true
end
@testset "Round trip" begin
for specimen in list_valid_specimens("BAM")
filepath = joinpath(bamdir, filename(specimen))
mktemp() do path, _
# copy
if hastags(specimen) && in("bai", tags(specimen))
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
else
reader = open(BAM.Reader, filepath)
end
header_original = header(reader)
writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ"))))
records = BAM.Record[]
for record in reader
push!(records, record)
write(writer, record)
end
close(reader)
close(writer)
reader = open(BAM.Reader, path)
@test header(reader) == header_original
@test compare_records(collect(reader), records)
close(reader)
end
end
end
@testset "In-Place-Reading Pattern" begin
file_bam = joinpath(bamdir, "ce#5b.bam")
records = open(collect, BAM.Reader, file_bam)
reader = open(BAM.Reader, file_bam)
record = BAM.Record()
i = 0
while !eof(reader)
empty!(record) # Reset the record.
read!(reader, record)
i = i + 1
@test records[i] == record
end
close(reader)
end
@testset "Random access" begin
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
@test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator)
@test isa(eachoverlap(reader, GenomicFeatures.Interval("chr1", 1, 100)), BAM.OverlapIterator)
# expected values are counted using samtools
for (refname, interval, expected) in [
("chr1", 1_000:10000, 21),
("chr1", 8_000:10000, 20),
("chr1", 766_000:800_000, 142),
("chr1", 786_000:800_000, 1),
("chr1", 796_000:800_000, 0)]
intsect = eachoverlap(reader, refname, interval)
@test eltype(intsect) == BAM.Record
@test count(_ -> true, intsect) == expected
# check that the intersection iterator is stateless
@test count(_ -> true, intsect) == expected
end
# randomized tests
for n in 1:50
refindex = 1
refname = "chr1"
range = randrange(1:1_000_000)
seekstart(reader)
# linear scan
expected = filter(collect(reader)) do record
BAM.compare_intervals(record, (refindex, range)) == 0
end
# indexed scan
actual = collect(eachoverlap(reader, refname, range))
@test compare_records(actual, expected)
end
close(reader)
filepath = joinpath(bamdir, "R_12h_D06.uniq.q40.bam")
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
@test isempty(collect(eachoverlap(reader, "chr19", 5823708:5846478)))
close(reader)
end
end

198
test/test_sam.jl Normal file
View file

@ -0,0 +1,198 @@
@testset "SAM" begin
samdir = path_of_format("SAM")
@testset "MetaInfo" begin
metainfo = SAM.MetaInfo()
@test !isfilled(metainfo)
@test occursin("not filled", repr(metainfo))
metainfo = SAM.MetaInfo("CO", "some comment (parens)")
@test isfilled(metainfo)
@test string(metainfo) == "@CO\tsome comment (parens)"
@test occursin("CO", repr(metainfo))
@test SAM.tag(metainfo) == "CO"
@test SAM.value(metainfo) == "some comment (parens)"
@test_throws ArgumentError keys(metainfo)
@test_throws ArgumentError values(metainfo)
metainfo = SAM.MetaInfo("HD", ["VN" => "1.0", "SO" => "coordinate"])
@test isfilled(metainfo)
@test string(metainfo) == "@HD\tVN:1.0\tSO:coordinate"
@test occursin("HD", repr(metainfo))
@test SAM.tag(metainfo) == "HD"
@test SAM.value(metainfo) == "VN:1.0\tSO:coordinate"
@test keys(metainfo) == ["VN", "SO"]
@test values(metainfo) == ["1.0", "coordinate"]
@test SAM.keyvalues(metainfo) == ["VN" => "1.0", "SO" => "coordinate"]
@test haskey(metainfo, "VN")
@test haskey(metainfo, "SO")
@test !haskey(metainfo, "GO")
@test metainfo["VN"] == "1.0"
@test metainfo["SO"] == "coordinate"
@test_throws KeyError metainfo["GO"]
end
@testset "Header" begin
header = SAM.Header()
@test isempty(header)
push!(header, SAM.MetaInfo("@HD\tVN:1.0\tSO:coordinate"))
@test !isempty(header)
@test length(header) == 1
push!(header, SAM.MetaInfo("@CO\tsome comment"))
@test length(header) == 2
@test isa(collect(header), Vector{SAM.MetaInfo})
end
@testset "Record" begin
record = SAM.Record()
@test !isfilled(record)
@test !SAM.ismapped(record)
@test repr(record) == "XAM.SAM.Record: <not filled>"
@test_throws ArgumentError SAM.flag(record)
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
@test isfilled(record)
@test occursin(r"^XAM.SAM.Record:\n", repr(record))
@test SAM.ismapped(record)
@test SAM.isprimary(record)
@test SAM.hastempname(record)
@test SAM.tempname(record) == "r001"
@test SAM.hasflag(record)
@test SAM.flag(record) === UInt16(99)
@test SAM.hasrefname(record)
@test SAM.refname(record) == "chr1"
@test SAM.hasposition(record)
@test SAM.position(record) === 7
@test SAM.hasmappingquality(record)
@test SAM.mappingquality(record) === UInt8(30)
@test SAM.hascigar(record)
@test SAM.cigar(record) == "8M2I4M1D3M"
@test SAM.hasnextrefname(record)
@test SAM.nextrefname(record) == "="
@test SAM.hasnextposition(record)
@test SAM.nextposition(record) === 37
@test SAM.hastemplength(record)
@test SAM.templength(record) === 39
@test SAM.hassequence(record)
@test SAM.sequence(record) == dna"TTAGATAAAGGATACTG"
@test !SAM.hasquality(record)
@test_throws MissingFieldException SAM.quality(record)
end
@testset "Reader" begin
reader = open(SAM.Reader, joinpath(samdir, "ce#1.sam"))
@test isa(reader, SAM.Reader)
@test eltype(reader) === SAM.Record
# header
h = header(reader)
@test string.(findall(header(reader), "SQ")) == ["@SQ\tSN:CHROMOSOME_I\tLN:1009800"]
# first record
record = SAM.Record()
read!(reader, record)
@test SAM.ismapped(record)
@test SAM.refname(record) == "CHROMOSOME_I"
@test SAM.position(record) == leftposition(record) == 2
@test SAM.rightposition(record) == rightposition(record) == 102
@test SAM.tempname(record) == seqname(record) == "SRR065390.14978392"
@test SAM.sequence(record) == sequence(record) == dna"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA"
@test SAM.sequence(String, record) == "CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA"
@test SAM.seqlength(record) == 100
@test SAM.quality(record) == (b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" .- 33)
@test SAM.quality(String, record) == "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"
@test SAM.flag(record) == 16
@test SAM.cigar(record) == "27M1D73M"
@test SAM.alignment(record) == Alignment([
AlignmentAnchor( 0, 1, OP_START),
AlignmentAnchor( 27, 28, OP_MATCH),
AlignmentAnchor( 27, 29, OP_DELETE),
AlignmentAnchor(100, 102, OP_MATCH)])
@test record["XG"] == 1
@test record["XM"] == 5
@test record["XN"] == 0
@test record["XO"] == 1
@test record["AS"] == -18
@test record["XS"] == -18
@test record["YT"] == "UU"
@test eof(reader)
close(reader)
# rightposition (also implicitly alignlength)
records = collect(open(SAM.Reader, joinpath(samdir, "ce#5b.sam")))
@test SAM.rightposition(records[6]) == rightposition(records[6]) == 83
# iterator
@test length(collect(open(SAM.Reader, joinpath(samdir, "ce#1.sam")))) == 1
@test length(collect(open(SAM.Reader, joinpath(samdir, "ce#2.sam")))) == 2
# IOStream
@test length(collect(SAM.Reader(open(joinpath(samdir, "ce#1.sam"))))) == 1
@test length(collect(SAM.Reader(open(joinpath(samdir, "ce#2.sam"))))) == 2
end
@testset "Round trip" begin
function compare_records(xs, ys)
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
if x.data[x.filled] != y.data[y.filled]
return false
end
end
return true
end
for specimen in list_valid_specimens("SAM")
filepath = joinpath(samdir, filename(specimen))
mktemp() do path, io
# copy
reader = open(SAM.Reader, filepath)
header_original = header(reader)
writer = SAM.Writer(io, header_original)
records = SAM.Record[]
for record in reader
push!(records, record)
write(writer, record)
end
close(reader)
close(writer)
reader = open(SAM.Reader, path)
@test header(reader) == header_original
@test compare_records(collect(reader), records)
close(reader)
end
end
end
@testset "In-Place-Reading Pattern" begin
file_sam = joinpath(samdir, "ce#5b.sam")
records = open(collect, SAM.Reader, file_sam)
reader = open(SAM.Reader, file_sam)
record = SAM.Record()
i = 0
while !eof(reader)
empty!(record) # Reset the record.
read!(reader, record)
i = i + 1
@test records[i] == record
end
close(reader)
end
end