Merge pull request #52 from BioJulia/release/0.3.0

Release v0.3.0
master
Thomas A. Christensen II 2 years ago committed by GitHub
commit 946e77a734
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,17 +1,45 @@
name: CompatHelper
on:
schedule:
- cron: '0 0 * * *'
- cron: 0 0 * * *
workflow_dispatch:
permissions:
contents: write
pull-requests: write
jobs:
CompatHelper:
runs-on: ubuntu-latest
steps:
- name: Add CompatHelper
run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")'
- name: Run CompatHelper
- name: Check if Julia is already available in the PATH
id: julia_in_path
run: which julia
continue-on-error: true
- name: Install Julia, but only if it is not already available in the PATH
uses: julia-actions/setup-julia@v1
with:
version: '1'
arch: ${{ runner.arch }}
if: steps.julia_in_path.outcome != 'success'
- name: "Add the General registry via Git"
run: |
import Pkg
ENV["JULIA_PKG_SERVER"] = ""
Pkg.Registry.add("General")
shell: julia --color=yes {0}
- name: "Install CompatHelper"
run: |
import Pkg
name = "CompatHelper"
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
version = "3"
Pkg.add(; name, uuid, version)
shell: julia --color=yes {0}
- name: "Run CompatHelper"
run: |
import CompatHelper
CompatHelper.main()
shell: julia --color=yes {0}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")'
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}

@ -1,5 +1,4 @@
name: Build Documentation
name: Documentation
on:
push:
branches:
@ -10,17 +9,15 @@ on:
pull_request:
jobs:
build:
Documenter:
permissions:
contents: write
name: Documentation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: '1'
- name: Install Dependencies
run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- name: Build and Deploy
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-docdeploy@v1
env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
run: julia --color=yes --project=docs/ docs/make.jl
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}

@ -1,12 +1,22 @@
name: TagBot
on:
schedule:
- cron: '0 0 * * *'
issue_comment:
types:
- created
workflow_dispatch:
inputs:
lookback:
default: 3
permissions:
contents: write
jobs:
TagBot:
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
runs-on: ubuntu-latest
steps:
- uses: JuliaRegistries/TagBot@v1
with:
token: ${{ secrets.GITHUB_TOKEN }}
ssh: ${{ secrets.TAGBOT_KEY }}
ssh: ${{ secrets.DOCUMENTER_KEY }}

@ -6,16 +6,20 @@ on:
jobs:
test:
name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: false
matrix:
julia-version:
- '1.0' # LTS
- '1.6' # LTS
- '1'
julia-arch: [x64, x86]
os: [ubuntu-latest, windows-latest, macOS-latest]
exclude:
- os: macOS-latest
julia-arch: x86
experimental: [false]
include:
- julia-version: nightly
@ -31,7 +35,7 @@ jobs:
with:
version: ${{ matrix.julia-version }}
- name: Run Tests
uses: julia-actions/julia-runtest@latest
uses: julia-actions/julia-runtest@v1
- name: Create CodeCov
uses: julia-actions/julia-processcoverage@v1
- name: Upload CodeCov

@ -0,0 +1,27 @@
# Changelog
All notable changes to HapLink.jl will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [0.3.0]
## Added
- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29))
- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43))
### Changed
- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21))
- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48))
### Fixed
- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD
[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0

@ -1,7 +1,7 @@
name = "XAM"
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
version = "0.2.8"
version = "0.3.0"
[deps]
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
@ -17,18 +17,19 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
[compat]
Automa = "0.7, 0.8"
BGZFStreams = "0.3.1"
BioAlignments = "2"
BioAlignments = "2.2"
BioGenerics = "0.1"
BioSequences = "2.0.4"
BioSequences = "3"
FormatSpecimens = "1.1"
GenomicFeatures = "2"
Indexes = "0.1"
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
julia = "1"
julia = "1.6"
[extras]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
test = ["FormatSpecimens", "Test"]
test = ["Documenter", "FormatSpecimens", "Test"]

@ -5,7 +5,6 @@
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original
blog post").
@ -66,4 +65,4 @@ Your logo will show up here with a link to your website.
## Questions?
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).

@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[compat]
Documenter = "0.24"
Documenter = "0.27"

@ -2,6 +2,8 @@ using Pkg
using Documenter, XAM
makedocs(
checkdocs = :all,
linkcheck = true,
format = Documenter.HTML(
edit_link = "develop"
),

@ -5,7 +5,6 @@
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).
@ -65,4 +64,4 @@ Your logo will show up here with a link to your website.
## Questions?
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).

@ -4,6 +4,27 @@ export
SAM,
BAM
"""
flag(record::Union{SAM.Record, BAM.Record})::UInt16
Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag
being OR'd together. The possible flags are:
0x0001 template having multiple segments in sequencing
0x0002 each segment properly aligned according to the aligner
0x0004 segment unmapped
0x0008 next segment in the template unmapped
0x0010 SEQ being reverse complemented
0x0020 SEQ of the next segment in the template being reverse complemented
0x0040 the first segment in the template
0x0080 the last segment in the template
0x0100 secondary alignment
0x0200 not passing filters, such as platform/vendor quality controls
0x0400 PCR or optical duplicate
0x0800 supplementary alignment
"""
function flag end
include("sam/sam.jl")
include("bam/bam.jl")

@ -6,6 +6,7 @@ module BAM
using BioGenerics
using GenomicFeatures
using XAM.SAM
import ..XAM: flag
import BGZFStreams
import BioAlignments

@ -50,12 +50,15 @@ function Base.iterate(iter::OverlapIterator)
if refindex === nothing
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
end
@assert iter.reader.index !== nothing
@assert iter.reader.index !== nothing "Reader index cannot be nothing."
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
if !isempty(chunks)
seek(iter.reader, first(chunks).start)
if isempty(chunks)
return nothing
end
state = OverlapIteratorState(refindex, chunks, 1, Record())
seek(iter.reader, state.chunks[state.chunkid].start)
return iterate(iter, state)
end
@ -65,7 +68,7 @@ function Base.iterate(iter::OverlapIterator, state)
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
read!(iter.reader, state.record)
c = compare_intervals(state.record, (state.refindex, iter.interval))
if c == 0
if c == 0 # overlapping
return copy(state.record), state
end
if c > 0
@ -78,6 +81,7 @@ function Base.iterate(iter::OverlapIterator, state)
seek(iter.reader, state.chunks[state.chunkid].start)
end
end
# no more overlapping records
return nothing
end
@ -89,7 +93,7 @@ function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
return -1
end
if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
if rid > interval[1] || (rid == interval[1] && leftposition(record) > last(interval[2]))
# strictly right
return +1
end

@ -11,18 +11,21 @@ mutable struct Record
block_size::Int32
refid::Int32
pos::Int32
bin_mq_nl::UInt32
flag_nc::UInt32
l_read_name::UInt8
mapq::UInt8
bin::UInt16
n_cigar_op::UInt16
flag::UInt16
l_seq::Int32
next_refid::Int32
next_pos::Int32
tlen::Int32
# variable length data
data::Vector{UInt8}
reader::Reader
reader::Union{Reader, Nothing}
function Record()
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
end
end
@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
return a.block_size == b.block_size &&
a.refid == b.refid &&
a.pos == b.pos &&
a.bin_mq_nl == b.bin_mq_nl &&
a.flag_nc == b.flag_nc &&
a.l_read_name == b.l_read_name &&
a.mapq == b.mapq &&
a.bin == b.bin &&
a.n_cigar_op == b.n_cigar_op &&
a.flag == b.flag &&
a.l_seq == b.l_seq &&
a.next_refid == b.next_refid &&
a.next_pos == b.next_pos &&
@ -60,19 +66,13 @@ end
function Base.copy(record::Record)
copy = Record()
copy.block_size = record.block_size
copy.refid = record.refid
copy.pos = record.pos
copy.bin_mq_nl = record.bin_mq_nl
copy.flag_nc = record.flag_nc
copy.l_seq = record.l_seq
copy.next_refid = record.next_refid
copy.next_pos = record.next_pos
copy.tlen = record.tlen
copy.data = record.data[1:data_size(record)]
if isdefined(record, :reader)
copy.reader = record.reader
GC.@preserve copy record begin
dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
src_pointer = Ptr{UInt8}(pointer_from_objref(record))
unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
end
copy.data = record.data[1:data_size(record)]
copy.reader = record.reader
return copy
end
@ -80,15 +80,18 @@ function Base.empty!(record::Record)
record.block_size = 0
record.refid = 0
record.pos = 0
record.bin_mq_nl = 0
record.flag_nc = 0
record.l_read_name = 0
record.mapq = 0
record.bin = 0
record.flag = 0
record.n_cigar_op = 0
record.l_seq = 0
record.next_refid = 0
record.next_pos = 0
record.tlen = 0
#Note: data will be overwritten and indexed using data_size.
return record
end
@ -125,14 +128,9 @@ end
# Accessor Fuctions
# -----------------
"""
flag(record::Record)::UInt16
Get the bitwise flag of `record`.
"""
function flag(record::Record)::UInt16
checkfilled(record)
return UInt16(record.flag_nc >> 16)
return record.flag
end
function hasflag(record::Record)
@ -318,7 +316,7 @@ end
Get the mapping quality of `record`.
"""
function mappingquality(record::Record)::UInt8
return UInt8((record.bin_mq_nl >> 8) & 0xff)
return record.mapq
end
function hasmappingquality(record::Record)
@ -397,7 +395,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
end
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF
cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
if !checkCG
return cigaridx, nops
end
@ -507,13 +505,16 @@ function hastemplength(record::Record)
end
"""
sequence(record::Record)::BioSequences.LongDNASeq
sequence(record::Record)::BioSequences.LongDNA{4}
Get the segment sequence of `record`.
"""
function sequence(record::Record)::BioSequences.LongDNASeq
function sequence(record::Record)
checkfilled(record)
seqlen = seqlength(record)
if seqlen == 0
return nothing
end
data = Vector{UInt64}(undef, cld(seqlen, 16))
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
for i in 1:lastindex(data)
@ -521,7 +522,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq
x = unsafe_load(src, i)
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
end
return BioSequences.LongDNASeq(data, 1:seqlen, false)
return BioSequences.LongDNA{4}(data, UInt(seqlen))
end
function hassequence(record::Record)
@ -543,15 +544,15 @@ function hasseqlength(record::Record)
end
"""
quality(record::Record)::Vector{UInt8}
quality(record::Record)
Get the base quality of `record`.
Get the base quality of `record`.
"""
function quality(record::Record)::Vector{UInt8}
function quality(record::Record)
checkfilled(record)
seqlen = seqlength(record)
offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2)
return [reinterpret(Int8, record.data[i+offset]) for i in 1:seqlen]
return record.data[(1+offset):(seqlen+offset)]
end
function hasquality(record::Record)
@ -661,5 +662,5 @@ end
# Return the length of the read name.
function seqname_length(record::Record)
return record.bin_mq_nl & 0xff
return record.l_read_name
end

@ -37,7 +37,7 @@ function Base.write(writer::Writer, record::Record)
end
function write_header(stream, header, refseqnames, refseqlens)
@assert length(refseqnames) == length(refseqlens)
@assert length(refseqnames) == length(refseqlens) "Lengths of refseq names and lengths must match."
n = 0
# magic bytes

@ -16,11 +16,14 @@ for (name, bits, doc) in [
(:QCFAIL, UInt16(0x200), "QC failure" ),
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
@assert bits isa UInt16
@assert bits isa UInt16 "The bits must be of type UInt16."
sym = Symbol("FLAG_", name)
doc = string(@sprintf("0x%04x: ", bits), doc)
docstring = """ $sym
SAM/BAM flag: $doc
See also: [`flag`](@ref)
"""
@eval begin
@doc $(doc) const $(sym) = $(bits)
@doc $(docstring) const $(sym) = $(bits)
end
end

@ -38,7 +38,7 @@ function Base.iterate(header::Header, i=1)
end
"""
find(header::Header, key::AbstractString)::Vector{MetaInfo}
findall(header::Header, key::AbstractString)::Vector{MetaInfo}
Find metainfo objects satisfying `SAM.tag(metainfo) == key`.
"""

@ -10,12 +10,16 @@ function Reader(state::State{S}) where {S <: TranscodingStream}
rdr = Reader(state, Header())
cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
rdr.state.state = sam_machine_body.start_state
rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body.
rdr.state.linenum = ln
rdr.state.filled = false
if cs != -1 && cs != 0 #Note: the header is finished when the state machine fails to transition after a new line (state 1).
throw(ArgumentError("Malformed SAM file header at line $(ln). Machine failed to transition from state $(cs)."))
end
return rdr
end
@ -65,18 +69,18 @@ end
function index!(record::MetaInfo)
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
found = index!(stream, record)
if !found
throw(ArgumentError("invalid SAM metadata"))
cs = index!(stream, record)
if cs != 0
throw(ArgumentError("Invalid SAM metadata. Machine failed to transition from state $(cs)."))
end
return record
end
function index!(record::Record)
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
found = index!(stream, record)
if !found
throw(ArgumentError("invalid SAM record"))
cs = index!(stream, record)
if cs != 0
throw(ArgumentError("Invalid SAM record. Machine failed to transition from state $(cs)."))
end
return record
end
@ -110,5 +114,6 @@ function Base.read!(rdr::Reader, record::Record)
throw(EOFError())
end
throw(ArgumentError("malformed SAM file"))
throw(ArgumentError("Malformed SAM file record at line $(ln). Machine failed to transition from state $(cs)."))
end

@ -41,7 +41,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
co.actions[:enter] = [:pos1]
co.actions[:exit] = [:metainfo_tag]
comment = re"[^\r\n]*"
comment = re"[^\r\n]*" # Note: Only single line comments are allowed.
comment.actions[:enter] = [:pos1]
comment.actions[:exit] = [:metainfo_val]
@ -137,7 +137,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
header = rep(cat(metainfo, newline))
header.actions[:exit] = [:header]
body = record * rep(newline * record) * opt(newline)
body = rep(cat(record, newline))
body.actions[:exit] = [:body]
sam = cat(header, body)
@ -168,13 +168,6 @@ function appendfrom!(dst, dpos, src, spos, n)
return dst
end
const action_metainfo = quote
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
metainfo.filled = 1:(p-@markpos)
found_metainfo = true
end
const sam_actions_metainfo = Dict(
:mark => :(@mark),
:pos1 => :(pos1 = @relpos(p)),
@ -183,7 +176,10 @@ const sam_actions_metainfo = Dict(
:metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
:metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
:metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
:metainfo => action_metainfo
:metainfo => quote
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
metainfo.filled = 1:(p-@markpos)
end
)
const sam_actions_header = merge(
@ -191,16 +187,11 @@ const sam_actions_header = merge(
Dict(
:countline => :(linenum += 1),
:metainfo => quote
$(action_metainfo)
$(sam_actions_metainfo[:metainfo])
push!(header, metainfo)
metainfo = MetaInfo()
end,
:header => quote
finish_header = true
@escape
end
:header => :(@escape)
)
)
@ -222,9 +213,6 @@ const sam_actions_record = Dict(
:record => quote
appendfrom!(record.data, 1, data, @markpos, p-@markpos)
record.filled = 1:(p-@markpos)
found_record = true
@escape
end
)
@ -232,18 +220,15 @@ const sam_actions_body = merge(
sam_actions_record,
Dict(
:countline => :(linenum += 1),
:body => quote
finish_body = true
:record => quote
found_record = true
$(sam_actions_record[:record])
@escape
end
end,
:body => :(@escape)
)
)
# const sam_actions = merge(
# sam_actions_header,
# sam_actions_body
# )
const sam_context = Automa.CodeGenContext(
generator = :goto,
checkbounds = false,
@ -253,43 +238,24 @@ const sam_context = Automa.CodeGenContext(
const sam_initcode_metainfo = quote
pos1 = 0
pos2 = 0
found_metainfo = false
end
const sam_initcode_record = quote
pos = 0
found_record = false
end
const sam_initcode_header = quote
$(sam_initcode_metainfo)
metainfo = MetaInfo()
finish_header = false
cs, linenum = state
end
const sam_initcode_record = quote
pos = 0
end
const sam_initcode_body = quote
$(sam_initcode_record)
finish_body = false
found_record = false
cs, linenum = state
end
const sam_loopcode_metainfo = quote
if cs < 0
throw(ArgumentError("malformed metainfo at pos $(p)"))
end
if found_metainfo
@goto __return__
end
end
const sam_returncode_metainfo = quote
return found_metainfo
end
Automa.Stream.generate_reader(
:index!,
sam_machine_metainfo,
@ -297,19 +263,10 @@ Automa.Stream.generate_reader(
actions = sam_actions_metainfo,
context = sam_context,
initcode = sam_initcode_metainfo,
loopcode = sam_loopcode_metainfo,
returncode = sam_returncode_metainfo
) |> eval
const sam_loopcode_header = quote
if finish_header
@goto __return__
end
end
const sam_returncode_header = quote
return cs, linenum, finish_header
return cs, linenum
end
Automa.Stream.generate_reader(
@ -319,40 +276,15 @@ Automa.Stream.generate_reader(
actions = sam_actions_header,
context = sam_context,
initcode = sam_initcode_header,
loopcode = sam_loopcode_header,
returncode = sam_returncode_header
) |> eval
const sam_loopcode_record = quote
if cs < 0
throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)"))
end
# # if cs != 0
# # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
# # end
if found_record
@goto __return__
end
end
const sam_loopcode_body = quote
$(sam_loopcode_record)
if finish_body
if found_record
@goto __return__
end
end
const sam_returncode_record = quote
return found_record
end
Automa.Stream.generate_reader(
:index!,
sam_machine_record,
@ -360,12 +292,9 @@ Automa.Stream.generate_reader(
actions = sam_actions_record,
context = sam_context,
initcode = sam_initcode_record,
loopcode = sam_loopcode_record,
returncode = sam_returncode_record
) |> eval
const sam_returncode_body = quote
return cs, linenum, found_record
end

@ -2,10 +2,11 @@
# ==========
mutable struct Record
# data and filled range
# Data and filled range.
data::Vector{UInt8}
filled::UnitRange{Int}
# indexes
filled::UnitRange{Int} # Note: Specifies the data in use.
# Mandatory fields.
qname::UnitRange{Int}
flag::UnitRange{Int}
rname::UnitRange{Int}
@ -17,6 +18,8 @@ mutable struct Record
tlen::UnitRange{Int}
seq::UnitRange{Int}
qual::UnitRange{Int}
# Auxiliary fields.
fields::Vector{UnitRange{Int}}
end
@ -147,11 +150,6 @@ end
# Accessor Functions
# ------------------
"""
flag(record::Record)::UInt16
Get the bitwise flag of `record`.
"""
function flag(record::Record)::UInt16
checkfilled(record)
return unsafe_parse_decimal(UInt16, record.data, record.flag)
@ -206,9 +204,9 @@ Get the 1-based leftmost mapping position of `record`.
function position(record::Record)::Int
checkfilled(record)
pos = unsafe_parse_decimal(Int, record.data, record.pos)
if pos == 0
missingerror(:position)
end
# if pos == 0
# missingerror(:position)
# end
return pos
end
@ -263,9 +261,9 @@ Get the position of the mate/next read of `record`.
function nextposition(record::Record)::Int
checkfilled(record)
pos = unsafe_parse_decimal(Int, record.data, record.pnext)
if pos == 0
missingerror(:nextposition)
end
# if pos == 0
# missingerror(:nextposition)
# end
return pos
end
@ -299,7 +297,8 @@ Get the CIGAR string of `record`.
function cigar(record::Record)::String
checkfilled(record)
if ismissing(record, record.cigar)
missingerror(:cigar)
# missingerror(:cigar)
return ""
end
return String(record.data[record.cigar])
end
@ -377,9 +376,9 @@ Get the template length of `record`.
function templength(record::Record)::Int
checkfilled(record)
len = unsafe_parse_decimal(Int, record.data, record.tlen)
if len == 0
missingerror(:tlen)
end
# if len == 0
# missingerror(:tlen)
# end
return len
end
@ -388,17 +387,18 @@ function hastemplength(record::Record)
end
"""
sequence(record::Record)::BioSequences.LongDNASeq
sequence(record::Record)::BioSequences.LongDNA{4}
Get the segment sequence of `record`.
"""
function sequence(record::Record)::BioSequences.LongDNASeq
function sequence(record::Record)
checkfilled(record)
if ismissing(record, record.seq)
missingerror(:sequence)
# missingerror(:sequence)
return nothing
end
seqlen = length(record.seq)
ret = BioSequences.LongDNASeq(seqlen)
ret = BioSequences.LongDNA{4}(undef, seqlen)
copyto!(ret, 1, record.data, first(record.seq), seqlen)
return ret
end
@ -494,7 +494,7 @@ function Base.getindex(record::Record, tag::AbstractString)
end
if typ == UInt8('A')
@assert lo == hi
@assert lo == hi "Values lo and hi must be equivalent."
return Char(record.data[lo])
end
if typ == UInt8('i')

@ -11,6 +11,7 @@ import BioGenerics.Exceptions: missingerror
import BioGenerics.Automa: State
import BioSequences
import TranscodingStreams: TranscodingStreams, TranscodingStream
import ..XAM: flag
using Printf: @sprintf
@ -46,20 +47,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I
return sign * x
end
# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams).
# import BufferedStreams: BufferedStreams, BufferedInputStream
# # A type keeping track of a ragel-based parser's state.
# mutable struct State{T<:BufferedInputStream}
# stream::T # input stream
# cs::Int # current DFA state of Ragel
# linenum::Int # line number: parser is responsible for updating this
# finished::Bool # true if finished (regardless of where in the stream we are)
# end
# function State(initstate::Int, input::BufferedInputStream)
# return State(input, initstate, 1, false)
# end
include("flags.jl")
include("metainfo.jl")

@ -1,4 +1,5 @@
using Test
using Documenter
using BioGenerics
using FormatSpecimens
@ -23,6 +24,13 @@ function randrange(range)
end
include("test_sam.jl")
include("test_bam.jl")
include("test_issues.jl")
@testset "XAM" begin
include("test_sam.jl")
include("test_bam.jl")
include("test_issues.jl")
include("test_crosscheck.jl")
# Include doctests.
DocMeta.setdocmeta!(XAM, :DocTestSetup, :(using XAM); recursive=true)
doctest(XAM; manual = false)
end

@ -104,9 +104,12 @@
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
end
new_record = convert(BAM.Record, array)
@test record.bin_mq_nl == new_record.bin_mq_nl
@test record.l_read_name == new_record.l_read_name
@test record.mapq == new_record.mapq
@test record.bin == new_record.bin
@test record.block_size == new_record.block_size
@test record.flag_nc == new_record.flag_nc
@test record.flag == new_record.flag
@test record.n_cigar_op == new_record.n_cigar_op
@test record.l_seq == new_record.l_seq
@test record.next_refid == new_record.next_refid
@test record.next_pos == new_record.next_pos
@ -162,18 +165,21 @@
if length(xs) != length(ys)
return false
end
for (x, y) in zip(xs, ys)
for (a, b) in zip(xs, ys)
if !(
x.block_size == y.block_size &&
x.refid == y.refid &&
x.pos == y.pos &&
x.bin_mq_nl == y.bin_mq_nl &&
x.flag_nc == y.flag_nc &&
x.l_seq == y.l_seq &&
x.next_refid == y.next_refid &&
x.next_pos == y.next_pos &&
x.tlen == y.tlen &&
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
a.block_size == b.block_size &&
a.refid == b.refid &&
a.pos == b.pos &&
a.l_read_name == b.l_read_name &&
a.mapq == b.mapq &&
a.bin == b.bin &&
a.n_cigar_op == b.n_cigar_op &&
a.flag == b.flag &&
a.l_seq == b.l_seq &&
a.next_refid == b.next_refid &&
a.next_pos == b.next_pos &&
a.tlen == b.tlen &&
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
return false
end
end

@ -0,0 +1,54 @@
@testset "Cross Check Properties" begin
Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl.
Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl
function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol)
bam_property = getproperty(XAM.BAM, property)
sam_property = getproperty(XAM.SAM, property)
if bam_property(bam) != sam_property(sam)
@warn "$property result is not the same" bam_property(bam) sam_property(sam)
return false
end
return true
end
samdir = path_of_format("SAM")
bamdir = path_of_format("BAM")
filenames = [
"ce#1",
"ce#2",
"ce#5",
"ce#5b",
"ce#unmap",
"ce#unmap1",
"ce#unmap2",
]
properties = [
:position,# POS
:tempname,# QNAME
:mappingquality,# MAPQ
:cigar, # CIGAR
:flag, # FLAG
:sequence, # SEQ
:nextposition, # PNEXT
:templength, # TLEN
]
for filename in filenames
records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam")))
records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam")))
for (bam, sam) in zip(records_bam, records_sam)
@test all(crosscheck.(bam, sam, properties)) == true
end
end
end # testset Crosscheck

@ -199,6 +199,6 @@
records = open(collect, SAM.Reader, file_sam)
@test records == []
end
end

Loading…
Cancel
Save