mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-12-23 13:28:16 +00:00
commit
946e77a734
25 changed files with 323 additions and 237 deletions
44
.github/workflows/CompatHelper.yml
vendored
44
.github/workflows/CompatHelper.yml
vendored
|
@ -1,17 +1,45 @@
|
|||
name: CompatHelper
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
- cron: 0 0 * * *
|
||||
workflow_dispatch:
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
jobs:
|
||||
CompatHelper:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Add CompatHelper
|
||||
run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")'
|
||||
- name: Run CompatHelper
|
||||
- name: Check if Julia is already available in the PATH
|
||||
id: julia_in_path
|
||||
run: which julia
|
||||
continue-on-error: true
|
||||
- name: Install Julia, but only if it is not already available in the PATH
|
||||
uses: julia-actions/setup-julia@v1
|
||||
with:
|
||||
version: '1'
|
||||
arch: ${{ runner.arch }}
|
||||
if: steps.julia_in_path.outcome != 'success'
|
||||
- name: "Add the General registry via Git"
|
||||
run: |
|
||||
import Pkg
|
||||
ENV["JULIA_PKG_SERVER"] = ""
|
||||
Pkg.Registry.add("General")
|
||||
shell: julia --color=yes {0}
|
||||
- name: "Install CompatHelper"
|
||||
run: |
|
||||
import Pkg
|
||||
name = "CompatHelper"
|
||||
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
|
||||
version = "3"
|
||||
Pkg.add(; name, uuid, version)
|
||||
shell: julia --color=yes {0}
|
||||
- name: "Run CompatHelper"
|
||||
run: |
|
||||
import CompatHelper
|
||||
CompatHelper.main()
|
||||
shell: julia --color=yes {0}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
|
||||
run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")'
|
||||
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
|
||||
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
|
||||
|
|
21
.github/workflows/Documentation.yml
vendored
21
.github/workflows/Documentation.yml
vendored
|
@ -1,5 +1,4 @@
|
|||
name: Build Documentation
|
||||
|
||||
name: Documentation
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
|
@ -10,17 +9,15 @@ on:
|
|||
pull_request:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
Documenter:
|
||||
permissions:
|
||||
contents: write
|
||||
name: Documentation
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: julia-actions/setup-julia@v1
|
||||
with:
|
||||
version: '1'
|
||||
- name: Install Dependencies
|
||||
run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
|
||||
- name: Build and Deploy
|
||||
- uses: julia-actions/julia-buildpkg@v1
|
||||
- uses: julia-actions/julia-docdeploy@v1
|
||||
env:
|
||||
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
|
||||
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
|
||||
run: julia --color=yes --project=docs/ docs/make.jl
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
|
||||
|
|
16
.github/workflows/TagBot.yml
vendored
16
.github/workflows/TagBot.yml
vendored
|
@ -1,12 +1,22 @@
|
|||
name: TagBot
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
issue_comment:
|
||||
types:
|
||||
- created
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
lookback:
|
||||
default: 3
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
TagBot:
|
||||
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: JuliaRegistries/TagBot@v1
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ssh: ${{ secrets.TAGBOT_KEY }}
|
||||
ssh: ${{ secrets.DOCUMENTER_KEY }}
|
||||
|
|
8
.github/workflows/UnitTests.yml
vendored
8
.github/workflows/UnitTests.yml
vendored
|
@ -6,16 +6,20 @@ on:
|
|||
|
||||
jobs:
|
||||
test:
|
||||
name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
continue-on-error: ${{ matrix.experimental }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
julia-version:
|
||||
- '1.0' # LTS
|
||||
- '1.6' # LTS
|
||||
- '1'
|
||||
julia-arch: [x64, x86]
|
||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||
exclude:
|
||||
- os: macOS-latest
|
||||
julia-arch: x86
|
||||
experimental: [false]
|
||||
include:
|
||||
- julia-version: nightly
|
||||
|
@ -31,7 +35,7 @@ jobs:
|
|||
with:
|
||||
version: ${{ matrix.julia-version }}
|
||||
- name: Run Tests
|
||||
uses: julia-actions/julia-runtest@latest
|
||||
uses: julia-actions/julia-runtest@v1
|
||||
- name: Create CodeCov
|
||||
uses: julia-actions/julia-processcoverage@v1
|
||||
- name: Upload CodeCov
|
||||
|
|
27
CHANGELOG.md
Normal file
27
CHANGELOG.md
Normal file
|
@ -0,0 +1,27 @@
|
|||
# Changelog
|
||||
|
||||
All notable changes to HapLink.jl will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.3.0]
|
||||
|
||||
## Added
|
||||
|
||||
- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29))
|
||||
- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43))
|
||||
|
||||
### Changed
|
||||
|
||||
- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21))
|
||||
- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48))
|
||||
|
||||
### Fixed
|
||||
|
||||
- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
|
||||
|
||||
[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD
|
||||
[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0
|
11
Project.toml
11
Project.toml
|
@ -1,7 +1,7 @@
|
|||
name = "XAM"
|
||||
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
||||
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
||||
version = "0.2.8"
|
||||
version = "0.3.0"
|
||||
|
||||
[deps]
|
||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||
|
@ -17,18 +17,19 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
|||
[compat]
|
||||
Automa = "0.7, 0.8"
|
||||
BGZFStreams = "0.3.1"
|
||||
BioAlignments = "2"
|
||||
BioAlignments = "2.2"
|
||||
BioGenerics = "0.1"
|
||||
BioSequences = "2.0.4"
|
||||
BioSequences = "3"
|
||||
FormatSpecimens = "1.1"
|
||||
GenomicFeatures = "2"
|
||||
Indexes = "0.1"
|
||||
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
||||
julia = "1"
|
||||
julia = "1.6"
|
||||
|
||||
[extras]
|
||||
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
||||
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[targets]
|
||||
test = ["FormatSpecimens", "Test"]
|
||||
test = ["Documenter", "FormatSpecimens", "Test"]
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
||||
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
||||
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
||||
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
|
||||
|
||||
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original
|
||||
blog post").
|
||||
|
@ -66,4 +65,4 @@ Your logo will show up here with a link to your website.
|
|||
|
||||
|
||||
## Questions?
|
||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||
|
|
|
@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
|||
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||
|
||||
[compat]
|
||||
Documenter = "0.24"
|
||||
Documenter = "0.27"
|
||||
|
|
|
@ -2,6 +2,8 @@ using Pkg
|
|||
using Documenter, XAM
|
||||
|
||||
makedocs(
|
||||
checkdocs = :all,
|
||||
linkcheck = true,
|
||||
format = Documenter.HTML(
|
||||
edit_link = "develop"
|
||||
),
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
||||
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
||||
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
||||
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
|
||||
|
||||
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).
|
||||
|
||||
|
@ -65,4 +64,4 @@ Your logo will show up here with a link to your website.
|
|||
|
||||
|
||||
## Questions?
|
||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||
|
|
21
src/XAM.jl
21
src/XAM.jl
|
@ -4,6 +4,27 @@ export
|
|||
SAM,
|
||||
BAM
|
||||
|
||||
"""
|
||||
flag(record::Union{SAM.Record, BAM.Record})::UInt16
|
||||
|
||||
Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag
|
||||
being OR'd together. The possible flags are:
|
||||
|
||||
0x0001 template having multiple segments in sequencing
|
||||
0x0002 each segment properly aligned according to the aligner
|
||||
0x0004 segment unmapped
|
||||
0x0008 next segment in the template unmapped
|
||||
0x0010 SEQ being reverse complemented
|
||||
0x0020 SEQ of the next segment in the template being reverse complemented
|
||||
0x0040 the first segment in the template
|
||||
0x0080 the last segment in the template
|
||||
0x0100 secondary alignment
|
||||
0x0200 not passing filters, such as platform/vendor quality controls
|
||||
0x0400 PCR or optical duplicate
|
||||
0x0800 supplementary alignment
|
||||
"""
|
||||
function flag end
|
||||
|
||||
include("sam/sam.jl")
|
||||
include("bam/bam.jl")
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ module BAM
|
|||
using BioGenerics
|
||||
using GenomicFeatures
|
||||
using XAM.SAM
|
||||
import ..XAM: flag
|
||||
|
||||
import BGZFStreams
|
||||
import BioAlignments
|
||||
|
|
|
@ -50,12 +50,15 @@ function Base.iterate(iter::OverlapIterator)
|
|||
if refindex === nothing
|
||||
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
||||
end
|
||||
@assert iter.reader.index !== nothing
|
||||
|
||||
@assert iter.reader.index !== nothing "Reader index cannot be nothing."
|
||||
|
||||
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
||||
if !isempty(chunks)
|
||||
seek(iter.reader, first(chunks).start)
|
||||
if isempty(chunks)
|
||||
return nothing
|
||||
end
|
||||
state = OverlapIteratorState(refindex, chunks, 1, Record())
|
||||
seek(iter.reader, state.chunks[state.chunkid].start)
|
||||
return iterate(iter, state)
|
||||
end
|
||||
|
||||
|
@ -65,7 +68,7 @@ function Base.iterate(iter::OverlapIterator, state)
|
|||
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
||||
read!(iter.reader, state.record)
|
||||
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
||||
if c == 0
|
||||
if c == 0 # overlapping
|
||||
return copy(state.record), state
|
||||
end
|
||||
if c > 0
|
||||
|
@ -78,6 +81,7 @@ function Base.iterate(iter::OverlapIterator, state)
|
|||
seek(iter.reader, state.chunks[state.chunkid].start)
|
||||
end
|
||||
end
|
||||
# no more overlapping records
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
@ -89,7 +93,7 @@ function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
|
|||
return -1
|
||||
end
|
||||
|
||||
if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
|
||||
if rid > interval[1] || (rid == interval[1] && leftposition(record) > last(interval[2]))
|
||||
# strictly right
|
||||
return +1
|
||||
end
|
||||
|
|
|
@ -11,18 +11,21 @@ mutable struct Record
|
|||
block_size::Int32
|
||||
refid::Int32
|
||||
pos::Int32
|
||||
bin_mq_nl::UInt32
|
||||
flag_nc::UInt32
|
||||
l_read_name::UInt8
|
||||
mapq::UInt8
|
||||
bin::UInt16
|
||||
n_cigar_op::UInt16
|
||||
flag::UInt16
|
||||
l_seq::Int32
|
||||
next_refid::Int32
|
||||
next_pos::Int32
|
||||
tlen::Int32
|
||||
# variable length data
|
||||
data::Vector{UInt8}
|
||||
reader::Reader
|
||||
reader::Union{Reader, Nothing}
|
||||
|
||||
function Record()
|
||||
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
|
||||
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
|
|||
return a.block_size == b.block_size &&
|
||||
a.refid == b.refid &&
|
||||
a.pos == b.pos &&
|
||||
a.bin_mq_nl == b.bin_mq_nl &&
|
||||
a.flag_nc == b.flag_nc &&
|
||||
a.l_read_name == b.l_read_name &&
|
||||
a.mapq == b.mapq &&
|
||||
a.bin == b.bin &&
|
||||
a.n_cigar_op == b.n_cigar_op &&
|
||||
a.flag == b.flag &&
|
||||
a.l_seq == b.l_seq &&
|
||||
a.next_refid == b.next_refid &&
|
||||
a.next_pos == b.next_pos &&
|
||||
|
@ -60,19 +66,13 @@ end
|
|||
|
||||
function Base.copy(record::Record)
|
||||
copy = Record()
|
||||
copy.block_size = record.block_size
|
||||
copy.refid = record.refid
|
||||
copy.pos = record.pos
|
||||
copy.bin_mq_nl = record.bin_mq_nl
|
||||
copy.flag_nc = record.flag_nc
|
||||
copy.l_seq = record.l_seq
|
||||
copy.next_refid = record.next_refid
|
||||
copy.next_pos = record.next_pos
|
||||
copy.tlen = record.tlen
|
||||
copy.data = record.data[1:data_size(record)]
|
||||
if isdefined(record, :reader)
|
||||
copy.reader = record.reader
|
||||
GC.@preserve copy record begin
|
||||
dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
|
||||
src_pointer = Ptr{UInt8}(pointer_from_objref(record))
|
||||
unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
|
||||
end
|
||||
copy.data = record.data[1:data_size(record)]
|
||||
copy.reader = record.reader
|
||||
return copy
|
||||
end
|
||||
|
||||
|
@ -80,15 +80,18 @@ function Base.empty!(record::Record)
|
|||
record.block_size = 0
|
||||
record.refid = 0
|
||||
record.pos = 0
|
||||
record.bin_mq_nl = 0
|
||||
record.flag_nc = 0
|
||||
record.l_read_name = 0
|
||||
record.mapq = 0
|
||||
record.bin = 0
|
||||
record.flag = 0
|
||||
record.n_cigar_op = 0
|
||||
record.l_seq = 0
|
||||
record.next_refid = 0
|
||||
record.next_pos = 0
|
||||
record.tlen = 0
|
||||
|
||||
#Note: data will be overwritten and indexed using data_size.
|
||||
|
||||
|
||||
return record
|
||||
end
|
||||
|
||||
|
@ -125,14 +128,9 @@ end
|
|||
# Accessor Fuctions
|
||||
# -----------------
|
||||
|
||||
"""
|
||||
flag(record::Record)::UInt16
|
||||
|
||||
Get the bitwise flag of `record`.
|
||||
"""
|
||||
function flag(record::Record)::UInt16
|
||||
checkfilled(record)
|
||||
return UInt16(record.flag_nc >> 16)
|
||||
return record.flag
|
||||
end
|
||||
|
||||
function hasflag(record::Record)
|
||||
|
@ -318,7 +316,7 @@ end
|
|||
Get the mapping quality of `record`.
|
||||
"""
|
||||
function mappingquality(record::Record)::UInt8
|
||||
return UInt8((record.bin_mq_nl >> 8) & 0xff)
|
||||
return record.mapq
|
||||
end
|
||||
|
||||
function hasmappingquality(record::Record)
|
||||
|
@ -397,7 +395,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
|
|||
end
|
||||
|
||||
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
|
||||
cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF
|
||||
cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
|
||||
if !checkCG
|
||||
return cigaridx, nops
|
||||
end
|
||||
|
@ -507,13 +505,16 @@ function hastemplength(record::Record)
|
|||
end
|
||||
|
||||
"""
|
||||
sequence(record::Record)::BioSequences.LongDNASeq
|
||||
sequence(record::Record)::BioSequences.LongDNA{4}
|
||||
|
||||
Get the segment sequence of `record`.
|
||||
"""
|
||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||
function sequence(record::Record)
|
||||
checkfilled(record)
|
||||
seqlen = seqlength(record)
|
||||
if seqlen == 0
|
||||
return nothing
|
||||
end
|
||||
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
||||
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
|
||||
for i in 1:lastindex(data)
|
||||
|
@ -521,7 +522,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq
|
|||
x = unsafe_load(src, i)
|
||||
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
||||
end
|
||||
return BioSequences.LongDNASeq(data, 1:seqlen, false)
|
||||
return BioSequences.LongDNA{4}(data, UInt(seqlen))
|
||||
end
|
||||
|
||||
function hassequence(record::Record)
|
||||
|
@ -543,15 +544,15 @@ function hasseqlength(record::Record)
|
|||
end
|
||||
|
||||
"""
|
||||
quality(record::Record)::Vector{UInt8}
|
||||
quality(record::Record)
|
||||
|
||||
Get the base quality of `record`.
|
||||
Get the base quality of `record`.
|
||||
"""
|
||||
function quality(record::Record)::Vector{UInt8}
|
||||
function quality(record::Record)
|
||||
checkfilled(record)
|
||||
seqlen = seqlength(record)
|
||||
offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2)
|
||||
return [reinterpret(Int8, record.data[i+offset]) for i in 1:seqlen]
|
||||
return record.data[(1+offset):(seqlen+offset)]
|
||||
end
|
||||
|
||||
function hasquality(record::Record)
|
||||
|
@ -661,5 +662,5 @@ end
|
|||
|
||||
# Return the length of the read name.
|
||||
function seqname_length(record::Record)
|
||||
return record.bin_mq_nl & 0xff
|
||||
return record.l_read_name
|
||||
end
|
||||
|
|
|
@ -37,7 +37,7 @@ function Base.write(writer::Writer, record::Record)
|
|||
end
|
||||
|
||||
function write_header(stream, header, refseqnames, refseqlens)
|
||||
@assert length(refseqnames) == length(refseqlens)
|
||||
@assert length(refseqnames) == length(refseqlens) "Lengths of refseq names and lengths must match."
|
||||
n = 0
|
||||
|
||||
# magic bytes
|
||||
|
|
|
@ -16,11 +16,14 @@ for (name, bits, doc) in [
|
|||
(:QCFAIL, UInt16(0x200), "QC failure" ),
|
||||
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
|
||||
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
|
||||
@assert bits isa UInt16
|
||||
@assert bits isa UInt16 "The bits must be of type UInt16."
|
||||
sym = Symbol("FLAG_", name)
|
||||
doc = string(@sprintf("0x%04x: ", bits), doc)
|
||||
docstring = """ $sym
|
||||
SAM/BAM flag: $doc
|
||||
|
||||
See also: [`flag`](@ref)
|
||||
"""
|
||||
@eval begin
|
||||
@doc $(doc) const $(sym) = $(bits)
|
||||
@doc $(docstring) const $(sym) = $(bits)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ function Base.iterate(header::Header, i=1)
|
|||
end
|
||||
|
||||
"""
|
||||
find(header::Header, key::AbstractString)::Vector{MetaInfo}
|
||||
findall(header::Header, key::AbstractString)::Vector{MetaInfo}
|
||||
|
||||
Find metainfo objects satisfying `SAM.tag(metainfo) == key`.
|
||||
"""
|
||||
|
|
|
@ -10,12 +10,16 @@ function Reader(state::State{S}) where {S <: TranscodingStream}
|
|||
|
||||
rdr = Reader(state, Header())
|
||||
|
||||
cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
|
||||
cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
|
||||
|
||||
rdr.state.state = sam_machine_body.start_state
|
||||
rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body.
|
||||
rdr.state.linenum = ln
|
||||
rdr.state.filled = false
|
||||
|
||||
if cs != -1 && cs != 0 #Note: the header is finished when the state machine fails to transition after a new line (state 1).
|
||||
throw(ArgumentError("Malformed SAM file header at line $(ln). Machine failed to transition from state $(cs)."))
|
||||
end
|
||||
|
||||
return rdr
|
||||
end
|
||||
|
||||
|
@ -65,18 +69,18 @@ end
|
|||
|
||||
function index!(record::MetaInfo)
|
||||
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
||||
found = index!(stream, record)
|
||||
if !found
|
||||
throw(ArgumentError("invalid SAM metadata"))
|
||||
cs = index!(stream, record)
|
||||
if cs != 0
|
||||
throw(ArgumentError("Invalid SAM metadata. Machine failed to transition from state $(cs)."))
|
||||
end
|
||||
return record
|
||||
end
|
||||
|
||||
function index!(record::Record)
|
||||
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
||||
found = index!(stream, record)
|
||||
if !found
|
||||
throw(ArgumentError("invalid SAM record"))
|
||||
cs = index!(stream, record)
|
||||
if cs != 0
|
||||
throw(ArgumentError("Invalid SAM record. Machine failed to transition from state $(cs)."))
|
||||
end
|
||||
return record
|
||||
end
|
||||
|
@ -110,5 +114,6 @@ function Base.read!(rdr::Reader, record::Record)
|
|||
throw(EOFError())
|
||||
end
|
||||
|
||||
throw(ArgumentError("malformed SAM file"))
|
||||
throw(ArgumentError("Malformed SAM file record at line $(ln). Machine failed to transition from state $(cs)."))
|
||||
|
||||
end
|
||||
|
|
|
@ -41,7 +41,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
|
|||
co.actions[:enter] = [:pos1]
|
||||
co.actions[:exit] = [:metainfo_tag]
|
||||
|
||||
comment = re"[^\r\n]*"
|
||||
comment = re"[^\r\n]*" # Note: Only single line comments are allowed.
|
||||
comment.actions[:enter] = [:pos1]
|
||||
comment.actions[:exit] = [:metainfo_val]
|
||||
|
||||
|
@ -137,7 +137,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
|
|||
header = rep(cat(metainfo, newline))
|
||||
header.actions[:exit] = [:header]
|
||||
|
||||
body = record * rep(newline * record) * opt(newline)
|
||||
body = rep(cat(record, newline))
|
||||
body.actions[:exit] = [:body]
|
||||
|
||||
sam = cat(header, body)
|
||||
|
@ -168,13 +168,6 @@ function appendfrom!(dst, dpos, src, spos, n)
|
|||
return dst
|
||||
end
|
||||
|
||||
const action_metainfo = quote
|
||||
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
|
||||
metainfo.filled = 1:(p-@markpos)
|
||||
|
||||
found_metainfo = true
|
||||
end
|
||||
|
||||
const sam_actions_metainfo = Dict(
|
||||
:mark => :(@mark),
|
||||
:pos1 => :(pos1 = @relpos(p)),
|
||||
|
@ -183,7 +176,10 @@ const sam_actions_metainfo = Dict(
|
|||
:metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
|
||||
:metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
|
||||
:metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
|
||||
:metainfo => action_metainfo
|
||||
:metainfo => quote
|
||||
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
|
||||
metainfo.filled = 1:(p-@markpos)
|
||||
end
|
||||
)
|
||||
|
||||
const sam_actions_header = merge(
|
||||
|
@ -191,16 +187,11 @@ const sam_actions_header = merge(
|
|||
Dict(
|
||||
:countline => :(linenum += 1),
|
||||
:metainfo => quote
|
||||
$(action_metainfo)
|
||||
$(sam_actions_metainfo[:metainfo])
|
||||
push!(header, metainfo)
|
||||
metainfo = MetaInfo()
|
||||
end,
|
||||
:header => quote
|
||||
|
||||
finish_header = true
|
||||
|
||||
@escape
|
||||
end
|
||||
:header => :(@escape)
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -222,9 +213,6 @@ const sam_actions_record = Dict(
|
|||
:record => quote
|
||||
appendfrom!(record.data, 1, data, @markpos, p-@markpos)
|
||||
record.filled = 1:(p-@markpos)
|
||||
|
||||
found_record = true
|
||||
@escape
|
||||
end
|
||||
)
|
||||
|
||||
|
@ -232,18 +220,15 @@ const sam_actions_body = merge(
|
|||
sam_actions_record,
|
||||
Dict(
|
||||
:countline => :(linenum += 1),
|
||||
:body => quote
|
||||
finish_body = true
|
||||
:record => quote
|
||||
found_record = true
|
||||
$(sam_actions_record[:record])
|
||||
@escape
|
||||
end
|
||||
end,
|
||||
:body => :(@escape)
|
||||
)
|
||||
)
|
||||
|
||||
# const sam_actions = merge(
|
||||
# sam_actions_header,
|
||||
# sam_actions_body
|
||||
# )
|
||||
|
||||
const sam_context = Automa.CodeGenContext(
|
||||
generator = :goto,
|
||||
checkbounds = false,
|
||||
|
@ -253,43 +238,24 @@ const sam_context = Automa.CodeGenContext(
|
|||
const sam_initcode_metainfo = quote
|
||||
pos1 = 0
|
||||
pos2 = 0
|
||||
found_metainfo = false
|
||||
end
|
||||
|
||||
const sam_initcode_record = quote
|
||||
pos = 0
|
||||
found_record = false
|
||||
end
|
||||
|
||||
const sam_initcode_header = quote
|
||||
$(sam_initcode_metainfo)
|
||||
metainfo = MetaInfo()
|
||||
finish_header = false
|
||||
cs, linenum = state
|
||||
end
|
||||
|
||||
const sam_initcode_record = quote
|
||||
pos = 0
|
||||
end
|
||||
|
||||
const sam_initcode_body = quote
|
||||
$(sam_initcode_record)
|
||||
finish_body = false
|
||||
found_record = false
|
||||
cs, linenum = state
|
||||
end
|
||||
|
||||
const sam_loopcode_metainfo = quote
|
||||
|
||||
if cs < 0
|
||||
throw(ArgumentError("malformed metainfo at pos $(p)"))
|
||||
end
|
||||
|
||||
if found_metainfo
|
||||
@goto __return__
|
||||
end
|
||||
end
|
||||
|
||||
const sam_returncode_metainfo = quote
|
||||
return found_metainfo
|
||||
end
|
||||
|
||||
|
||||
Automa.Stream.generate_reader(
|
||||
:index!,
|
||||
sam_machine_metainfo,
|
||||
|
@ -297,19 +263,10 @@ Automa.Stream.generate_reader(
|
|||
actions = sam_actions_metainfo,
|
||||
context = sam_context,
|
||||
initcode = sam_initcode_metainfo,
|
||||
loopcode = sam_loopcode_metainfo,
|
||||
returncode = sam_returncode_metainfo
|
||||
) |> eval
|
||||
|
||||
const sam_loopcode_header = quote
|
||||
|
||||
if finish_header
|
||||
@goto __return__
|
||||
end
|
||||
end
|
||||
|
||||
const sam_returncode_header = quote
|
||||
return cs, linenum, finish_header
|
||||
return cs, linenum
|
||||
end
|
||||
|
||||
Automa.Stream.generate_reader(
|
||||
|
@ -319,38 +276,13 @@ Automa.Stream.generate_reader(
|
|||
actions = sam_actions_header,
|
||||
context = sam_context,
|
||||
initcode = sam_initcode_header,
|
||||
loopcode = sam_loopcode_header,
|
||||
returncode = sam_returncode_header
|
||||
) |> eval
|
||||
|
||||
|
||||
const sam_loopcode_record = quote
|
||||
|
||||
if cs < 0
|
||||
throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)"))
|
||||
end
|
||||
|
||||
# # if cs != 0
|
||||
# # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
|
||||
# # end
|
||||
|
||||
const sam_loopcode_body = quote
|
||||
if found_record
|
||||
@goto __return__
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
const sam_loopcode_body = quote
|
||||
|
||||
$(sam_loopcode_record)
|
||||
|
||||
if finish_body
|
||||
@goto __return__
|
||||
end
|
||||
end
|
||||
|
||||
const sam_returncode_record = quote
|
||||
return found_record
|
||||
end
|
||||
|
||||
Automa.Stream.generate_reader(
|
||||
|
@ -360,12 +292,9 @@ Automa.Stream.generate_reader(
|
|||
actions = sam_actions_record,
|
||||
context = sam_context,
|
||||
initcode = sam_initcode_record,
|
||||
loopcode = sam_loopcode_record,
|
||||
returncode = sam_returncode_record
|
||||
) |> eval
|
||||
|
||||
|
||||
|
||||
const sam_returncode_body = quote
|
||||
return cs, linenum, found_record
|
||||
end
|
||||
|
|
|
@ -2,10 +2,11 @@
|
|||
# ==========
|
||||
|
||||
mutable struct Record
|
||||
# data and filled range
|
||||
# Data and filled range.
|
||||
data::Vector{UInt8}
|
||||
filled::UnitRange{Int}
|
||||
# indexes
|
||||
filled::UnitRange{Int} # Note: Specifies the data in use.
|
||||
|
||||
# Mandatory fields.
|
||||
qname::UnitRange{Int}
|
||||
flag::UnitRange{Int}
|
||||
rname::UnitRange{Int}
|
||||
|
@ -17,6 +18,8 @@ mutable struct Record
|
|||
tlen::UnitRange{Int}
|
||||
seq::UnitRange{Int}
|
||||
qual::UnitRange{Int}
|
||||
|
||||
# Auxiliary fields.
|
||||
fields::Vector{UnitRange{Int}}
|
||||
end
|
||||
|
||||
|
@ -147,11 +150,6 @@ end
|
|||
# Accessor Functions
|
||||
# ------------------
|
||||
|
||||
"""
|
||||
flag(record::Record)::UInt16
|
||||
|
||||
Get the bitwise flag of `record`.
|
||||
"""
|
||||
function flag(record::Record)::UInt16
|
||||
checkfilled(record)
|
||||
return unsafe_parse_decimal(UInt16, record.data, record.flag)
|
||||
|
@ -206,9 +204,9 @@ Get the 1-based leftmost mapping position of `record`.
|
|||
function position(record::Record)::Int
|
||||
checkfilled(record)
|
||||
pos = unsafe_parse_decimal(Int, record.data, record.pos)
|
||||
if pos == 0
|
||||
missingerror(:position)
|
||||
end
|
||||
# if pos == 0
|
||||
# missingerror(:position)
|
||||
# end
|
||||
return pos
|
||||
end
|
||||
|
||||
|
@ -263,9 +261,9 @@ Get the position of the mate/next read of `record`.
|
|||
function nextposition(record::Record)::Int
|
||||
checkfilled(record)
|
||||
pos = unsafe_parse_decimal(Int, record.data, record.pnext)
|
||||
if pos == 0
|
||||
missingerror(:nextposition)
|
||||
end
|
||||
# if pos == 0
|
||||
# missingerror(:nextposition)
|
||||
# end
|
||||
return pos
|
||||
end
|
||||
|
||||
|
@ -299,7 +297,8 @@ Get the CIGAR string of `record`.
|
|||
function cigar(record::Record)::String
|
||||
checkfilled(record)
|
||||
if ismissing(record, record.cigar)
|
||||
missingerror(:cigar)
|
||||
# missingerror(:cigar)
|
||||
return ""
|
||||
end
|
||||
return String(record.data[record.cigar])
|
||||
end
|
||||
|
@ -377,9 +376,9 @@ Get the template length of `record`.
|
|||
function templength(record::Record)::Int
|
||||
checkfilled(record)
|
||||
len = unsafe_parse_decimal(Int, record.data, record.tlen)
|
||||
if len == 0
|
||||
missingerror(:tlen)
|
||||
end
|
||||
# if len == 0
|
||||
# missingerror(:tlen)
|
||||
# end
|
||||
return len
|
||||
end
|
||||
|
||||
|
@ -388,17 +387,18 @@ function hastemplength(record::Record)
|
|||
end
|
||||
|
||||
"""
|
||||
sequence(record::Record)::BioSequences.LongDNASeq
|
||||
sequence(record::Record)::BioSequences.LongDNA{4}
|
||||
|
||||
Get the segment sequence of `record`.
|
||||
"""
|
||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||
function sequence(record::Record)
|
||||
checkfilled(record)
|
||||
if ismissing(record, record.seq)
|
||||
missingerror(:sequence)
|
||||
# missingerror(:sequence)
|
||||
return nothing
|
||||
end
|
||||
seqlen = length(record.seq)
|
||||
ret = BioSequences.LongDNASeq(seqlen)
|
||||
ret = BioSequences.LongDNA{4}(undef, seqlen)
|
||||
copyto!(ret, 1, record.data, first(record.seq), seqlen)
|
||||
return ret
|
||||
end
|
||||
|
@ -494,7 +494,7 @@ function Base.getindex(record::Record, tag::AbstractString)
|
|||
end
|
||||
|
||||
if typ == UInt8('A')
|
||||
@assert lo == hi
|
||||
@assert lo == hi "Values lo and hi must be equivalent."
|
||||
return Char(record.data[lo])
|
||||
end
|
||||
if typ == UInt8('i')
|
||||
|
|
|
@ -11,6 +11,7 @@ import BioGenerics.Exceptions: missingerror
|
|||
import BioGenerics.Automa: State
|
||||
import BioSequences
|
||||
import TranscodingStreams: TranscodingStreams, TranscodingStream
|
||||
import ..XAM: flag
|
||||
|
||||
using Printf: @sprintf
|
||||
|
||||
|
@ -46,20 +47,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I
|
|||
return sign * x
|
||||
end
|
||||
|
||||
# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams).
|
||||
# import BufferedStreams: BufferedStreams, BufferedInputStream
|
||||
# # A type keeping track of a ragel-based parser's state.
|
||||
# mutable struct State{T<:BufferedInputStream}
|
||||
# stream::T # input stream
|
||||
# cs::Int # current DFA state of Ragel
|
||||
# linenum::Int # line number: parser is responsible for updating this
|
||||
# finished::Bool # true if finished (regardless of where in the stream we are)
|
||||
# end
|
||||
|
||||
# function State(initstate::Int, input::BufferedInputStream)
|
||||
# return State(input, initstate, 1, false)
|
||||
# end
|
||||
|
||||
|
||||
include("flags.jl")
|
||||
include("metainfo.jl")
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Test
|
||||
using Documenter
|
||||
|
||||
using BioGenerics
|
||||
using FormatSpecimens
|
||||
|
@ -23,6 +24,13 @@ function randrange(range)
|
|||
end
|
||||
|
||||
|
||||
include("test_sam.jl")
|
||||
include("test_bam.jl")
|
||||
include("test_issues.jl")
|
||||
@testset "XAM" begin
|
||||
include("test_sam.jl")
|
||||
include("test_bam.jl")
|
||||
include("test_issues.jl")
|
||||
include("test_crosscheck.jl")
|
||||
|
||||
# Include doctests.
|
||||
DocMeta.setdocmeta!(XAM, :DocTestSetup, :(using XAM); recursive=true)
|
||||
doctest(XAM; manual = false)
|
||||
end
|
||||
|
|
|
@ -104,9 +104,12 @@
|
|||
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
|
||||
end
|
||||
new_record = convert(BAM.Record, array)
|
||||
@test record.bin_mq_nl == new_record.bin_mq_nl
|
||||
@test record.l_read_name == new_record.l_read_name
|
||||
@test record.mapq == new_record.mapq
|
||||
@test record.bin == new_record.bin
|
||||
@test record.block_size == new_record.block_size
|
||||
@test record.flag_nc == new_record.flag_nc
|
||||
@test record.flag == new_record.flag
|
||||
@test record.n_cigar_op == new_record.n_cigar_op
|
||||
@test record.l_seq == new_record.l_seq
|
||||
@test record.next_refid == new_record.next_refid
|
||||
@test record.next_pos == new_record.next_pos
|
||||
|
@ -162,18 +165,21 @@
|
|||
if length(xs) != length(ys)
|
||||
return false
|
||||
end
|
||||
for (x, y) in zip(xs, ys)
|
||||
for (a, b) in zip(xs, ys)
|
||||
if !(
|
||||
x.block_size == y.block_size &&
|
||||
x.refid == y.refid &&
|
||||
x.pos == y.pos &&
|
||||
x.bin_mq_nl == y.bin_mq_nl &&
|
||||
x.flag_nc == y.flag_nc &&
|
||||
x.l_seq == y.l_seq &&
|
||||
x.next_refid == y.next_refid &&
|
||||
x.next_pos == y.next_pos &&
|
||||
x.tlen == y.tlen &&
|
||||
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
|
||||
a.block_size == b.block_size &&
|
||||
a.refid == b.refid &&
|
||||
a.pos == b.pos &&
|
||||
a.l_read_name == b.l_read_name &&
|
||||
a.mapq == b.mapq &&
|
||||
a.bin == b.bin &&
|
||||
a.n_cigar_op == b.n_cigar_op &&
|
||||
a.flag == b.flag &&
|
||||
a.l_seq == b.l_seq &&
|
||||
a.next_refid == b.next_refid &&
|
||||
a.next_pos == b.next_pos &&
|
||||
a.tlen == b.tlen &&
|
||||
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
|
54
test/test_crosscheck.jl
Normal file
54
test/test_crosscheck.jl
Normal file
|
@ -0,0 +1,54 @@
|
|||
@testset "Cross Check Properties" begin
|
||||
|
||||
Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl.
|
||||
Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl
|
||||
|
||||
function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol)
|
||||
|
||||
bam_property = getproperty(XAM.BAM, property)
|
||||
sam_property = getproperty(XAM.SAM, property)
|
||||
|
||||
if bam_property(bam) != sam_property(sam)
|
||||
@warn "$property result is not the same" bam_property(bam) sam_property(sam)
|
||||
return false
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
samdir = path_of_format("SAM")
|
||||
bamdir = path_of_format("BAM")
|
||||
|
||||
filenames = [
|
||||
"ce#1",
|
||||
"ce#2",
|
||||
"ce#5",
|
||||
"ce#5b",
|
||||
"ce#unmap",
|
||||
"ce#unmap1",
|
||||
"ce#unmap2",
|
||||
]
|
||||
|
||||
properties = [
|
||||
:position,# POS
|
||||
:tempname,# QNAME
|
||||
:mappingquality,# MAPQ
|
||||
:cigar, # CIGAR
|
||||
:flag, # FLAG
|
||||
:sequence, # SEQ
|
||||
:nextposition, # PNEXT
|
||||
:templength, # TLEN
|
||||
]
|
||||
|
||||
for filename in filenames
|
||||
|
||||
records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam")))
|
||||
records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam")))
|
||||
|
||||
for (bam, sam) in zip(records_bam, records_sam)
|
||||
@test all(crosscheck.(bam, sam, properties)) == true
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end # testset Crosscheck
|
|
@ -199,6 +199,6 @@
|
|||
|
||||
records = open(collect, SAM.Reader, file_sam)
|
||||
@test records == []
|
||||
|
||||
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue