mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-11-23 02:09:55 +00:00
commit
946e77a734
25 changed files with 323 additions and 237 deletions
44
.github/workflows/CompatHelper.yml
vendored
44
.github/workflows/CompatHelper.yml
vendored
|
@ -1,17 +1,45 @@
|
||||||
name: CompatHelper
|
name: CompatHelper
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 0 * * *'
|
- cron: 0 0 * * *
|
||||||
|
workflow_dispatch:
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
jobs:
|
jobs:
|
||||||
CompatHelper:
|
CompatHelper:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Add CompatHelper
|
- name: Check if Julia is already available in the PATH
|
||||||
run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")'
|
id: julia_in_path
|
||||||
- name: Run CompatHelper
|
run: which julia
|
||||||
|
continue-on-error: true
|
||||||
|
- name: Install Julia, but only if it is not already available in the PATH
|
||||||
|
uses: julia-actions/setup-julia@v1
|
||||||
|
with:
|
||||||
|
version: '1'
|
||||||
|
arch: ${{ runner.arch }}
|
||||||
|
if: steps.julia_in_path.outcome != 'success'
|
||||||
|
- name: "Add the General registry via Git"
|
||||||
|
run: |
|
||||||
|
import Pkg
|
||||||
|
ENV["JULIA_PKG_SERVER"] = ""
|
||||||
|
Pkg.Registry.add("General")
|
||||||
|
shell: julia --color=yes {0}
|
||||||
|
- name: "Install CompatHelper"
|
||||||
|
run: |
|
||||||
|
import Pkg
|
||||||
|
name = "CompatHelper"
|
||||||
|
uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
|
||||||
|
version = "3"
|
||||||
|
Pkg.add(; name, uuid, version)
|
||||||
|
shell: julia --color=yes {0}
|
||||||
|
- name: "Run CompatHelper"
|
||||||
|
run: |
|
||||||
|
import CompatHelper
|
||||||
|
CompatHelper.main()
|
||||||
|
shell: julia --color=yes {0}
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
|
COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
|
||||||
run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")'
|
# COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
|
||||||
|
|
21
.github/workflows/Documentation.yml
vendored
21
.github/workflows/Documentation.yml
vendored
|
@ -1,5 +1,4 @@
|
||||||
name: Build Documentation
|
name: Documentation
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
|
@ -10,17 +9,15 @@ on:
|
||||||
pull_request:
|
pull_request:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
Documenter:
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
name: Documentation
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- uses: julia-actions/setup-julia@v1
|
- uses: julia-actions/julia-buildpkg@v1
|
||||||
with:
|
- uses: julia-actions/julia-docdeploy@v1
|
||||||
version: '1'
|
|
||||||
- name: Install Dependencies
|
|
||||||
run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
|
|
||||||
- name: Build and Deploy
|
|
||||||
env:
|
env:
|
||||||
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
|
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
|
||||||
run: julia --color=yes --project=docs/ docs/make.jl
|
|
||||||
|
|
16
.github/workflows/TagBot.yml
vendored
16
.github/workflows/TagBot.yml
vendored
|
@ -1,12 +1,22 @@
|
||||||
name: TagBot
|
name: TagBot
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
issue_comment:
|
||||||
- cron: '0 0 * * *'
|
types:
|
||||||
|
- created
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
lookback:
|
||||||
|
default: 3
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
TagBot:
|
TagBot:
|
||||||
|
if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: JuliaRegistries/TagBot@v1
|
- uses: JuliaRegistries/TagBot@v1
|
||||||
with:
|
with:
|
||||||
token: ${{ secrets.GITHUB_TOKEN }}
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
ssh: ${{ secrets.TAGBOT_KEY }}
|
ssh: ${{ secrets.DOCUMENTER_KEY }}
|
||||||
|
|
8
.github/workflows/UnitTests.yml
vendored
8
.github/workflows/UnitTests.yml
vendored
|
@ -6,16 +6,20 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
|
name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }}
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
continue-on-error: ${{ matrix.experimental }}
|
continue-on-error: ${{ matrix.experimental }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
julia-version:
|
julia-version:
|
||||||
- '1.0' # LTS
|
- '1.6' # LTS
|
||||||
- '1'
|
- '1'
|
||||||
julia-arch: [x64, x86]
|
julia-arch: [x64, x86]
|
||||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
os: [ubuntu-latest, windows-latest, macOS-latest]
|
||||||
|
exclude:
|
||||||
|
- os: macOS-latest
|
||||||
|
julia-arch: x86
|
||||||
experimental: [false]
|
experimental: [false]
|
||||||
include:
|
include:
|
||||||
- julia-version: nightly
|
- julia-version: nightly
|
||||||
|
@ -31,7 +35,7 @@ jobs:
|
||||||
with:
|
with:
|
||||||
version: ${{ matrix.julia-version }}
|
version: ${{ matrix.julia-version }}
|
||||||
- name: Run Tests
|
- name: Run Tests
|
||||||
uses: julia-actions/julia-runtest@latest
|
uses: julia-actions/julia-runtest@v1
|
||||||
- name: Create CodeCov
|
- name: Create CodeCov
|
||||||
uses: julia-actions/julia-processcoverage@v1
|
uses: julia-actions/julia-processcoverage@v1
|
||||||
- name: Upload CodeCov
|
- name: Upload CodeCov
|
||||||
|
|
27
CHANGELOG.md
Normal file
27
CHANGELOG.md
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
All notable changes to HapLink.jl will be documented in this file.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.3.0]
|
||||||
|
|
||||||
|
## Added
|
||||||
|
|
||||||
|
- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29))
|
||||||
|
- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43))
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21))
|
||||||
|
- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48))
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
|
||||||
|
|
||||||
|
[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD
|
||||||
|
[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0
|
11
Project.toml
11
Project.toml
|
@ -1,7 +1,7 @@
|
||||||
name = "XAM"
|
name = "XAM"
|
||||||
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
||||||
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
||||||
version = "0.2.8"
|
version = "0.3.0"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||||
|
@ -17,18 +17,19 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
||||||
[compat]
|
[compat]
|
||||||
Automa = "0.7, 0.8"
|
Automa = "0.7, 0.8"
|
||||||
BGZFStreams = "0.3.1"
|
BGZFStreams = "0.3.1"
|
||||||
BioAlignments = "2"
|
BioAlignments = "2.2"
|
||||||
BioGenerics = "0.1"
|
BioGenerics = "0.1"
|
||||||
BioSequences = "2.0.4"
|
BioSequences = "3"
|
||||||
FormatSpecimens = "1.1"
|
FormatSpecimens = "1.1"
|
||||||
GenomicFeatures = "2"
|
GenomicFeatures = "2"
|
||||||
Indexes = "0.1"
|
Indexes = "0.1"
|
||||||
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
|
||||||
julia = "1"
|
julia = "1.6"
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
|
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
||||||
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
|
FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
|
||||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||||
|
|
||||||
[targets]
|
[targets]
|
||||||
test = ["FormatSpecimens", "Test"]
|
test = ["Documenter", "FormatSpecimens", "Test"]
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
||||||
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
||||||
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
||||||
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
|
|
||||||
|
|
||||||
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original
|
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original
|
||||||
blog post").
|
blog post").
|
||||||
|
@ -66,4 +65,4 @@ Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
|
|
||||||
## Questions?
|
## Questions?
|
||||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||||
|
|
|
@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
||||||
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
|
||||||
|
|
||||||
[compat]
|
[compat]
|
||||||
Documenter = "0.24"
|
Documenter = "0.27"
|
||||||
|
|
|
@ -2,6 +2,8 @@ using Pkg
|
||||||
using Documenter, XAM
|
using Documenter, XAM
|
||||||
|
|
||||||
makedocs(
|
makedocs(
|
||||||
|
checkdocs = :all,
|
||||||
|
linkcheck = true,
|
||||||
format = Documenter.HTML(
|
format = Documenter.HTML(
|
||||||
edit_link = "develop"
|
edit_link = "develop"
|
||||||
),
|
),
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
||||||
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
||||||
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
||||||
[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)
|
|
||||||
|
|
||||||
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).
|
> This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).
|
||||||
|
|
||||||
|
@ -65,4 +64,4 @@ Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
|
|
||||||
## Questions?
|
## Questions?
|
||||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||||
|
|
21
src/XAM.jl
21
src/XAM.jl
|
@ -4,6 +4,27 @@ export
|
||||||
SAM,
|
SAM,
|
||||||
BAM
|
BAM
|
||||||
|
|
||||||
|
"""
|
||||||
|
flag(record::Union{SAM.Record, BAM.Record})::UInt16
|
||||||
|
|
||||||
|
Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag
|
||||||
|
being OR'd together. The possible flags are:
|
||||||
|
|
||||||
|
0x0001 template having multiple segments in sequencing
|
||||||
|
0x0002 each segment properly aligned according to the aligner
|
||||||
|
0x0004 segment unmapped
|
||||||
|
0x0008 next segment in the template unmapped
|
||||||
|
0x0010 SEQ being reverse complemented
|
||||||
|
0x0020 SEQ of the next segment in the template being reverse complemented
|
||||||
|
0x0040 the first segment in the template
|
||||||
|
0x0080 the last segment in the template
|
||||||
|
0x0100 secondary alignment
|
||||||
|
0x0200 not passing filters, such as platform/vendor quality controls
|
||||||
|
0x0400 PCR or optical duplicate
|
||||||
|
0x0800 supplementary alignment
|
||||||
|
"""
|
||||||
|
function flag end
|
||||||
|
|
||||||
include("sam/sam.jl")
|
include("sam/sam.jl")
|
||||||
include("bam/bam.jl")
|
include("bam/bam.jl")
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ module BAM
|
||||||
using BioGenerics
|
using BioGenerics
|
||||||
using GenomicFeatures
|
using GenomicFeatures
|
||||||
using XAM.SAM
|
using XAM.SAM
|
||||||
|
import ..XAM: flag
|
||||||
|
|
||||||
import BGZFStreams
|
import BGZFStreams
|
||||||
import BioAlignments
|
import BioAlignments
|
||||||
|
|
|
@ -50,12 +50,15 @@ function Base.iterate(iter::OverlapIterator)
|
||||||
if refindex === nothing
|
if refindex === nothing
|
||||||
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
||||||
end
|
end
|
||||||
@assert iter.reader.index !== nothing
|
|
||||||
|
@assert iter.reader.index !== nothing "Reader index cannot be nothing."
|
||||||
|
|
||||||
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
||||||
if !isempty(chunks)
|
if isempty(chunks)
|
||||||
seek(iter.reader, first(chunks).start)
|
return nothing
|
||||||
end
|
end
|
||||||
state = OverlapIteratorState(refindex, chunks, 1, Record())
|
state = OverlapIteratorState(refindex, chunks, 1, Record())
|
||||||
|
seek(iter.reader, state.chunks[state.chunkid].start)
|
||||||
return iterate(iter, state)
|
return iterate(iter, state)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -65,7 +68,7 @@ function Base.iterate(iter::OverlapIterator, state)
|
||||||
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
|
||||||
read!(iter.reader, state.record)
|
read!(iter.reader, state.record)
|
||||||
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
c = compare_intervals(state.record, (state.refindex, iter.interval))
|
||||||
if c == 0
|
if c == 0 # overlapping
|
||||||
return copy(state.record), state
|
return copy(state.record), state
|
||||||
end
|
end
|
||||||
if c > 0
|
if c > 0
|
||||||
|
@ -78,6 +81,7 @@ function Base.iterate(iter::OverlapIterator, state)
|
||||||
seek(iter.reader, state.chunks[state.chunkid].start)
|
seek(iter.reader, state.chunks[state.chunkid].start)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
# no more overlapping records
|
||||||
return nothing
|
return nothing
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -89,7 +93,7 @@ function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
|
||||||
return -1
|
return -1
|
||||||
end
|
end
|
||||||
|
|
||||||
if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
|
if rid > interval[1] || (rid == interval[1] && leftposition(record) > last(interval[2]))
|
||||||
# strictly right
|
# strictly right
|
||||||
return +1
|
return +1
|
||||||
end
|
end
|
||||||
|
|
|
@ -11,18 +11,21 @@ mutable struct Record
|
||||||
block_size::Int32
|
block_size::Int32
|
||||||
refid::Int32
|
refid::Int32
|
||||||
pos::Int32
|
pos::Int32
|
||||||
bin_mq_nl::UInt32
|
l_read_name::UInt8
|
||||||
flag_nc::UInt32
|
mapq::UInt8
|
||||||
|
bin::UInt16
|
||||||
|
n_cigar_op::UInt16
|
||||||
|
flag::UInt16
|
||||||
l_seq::Int32
|
l_seq::Int32
|
||||||
next_refid::Int32
|
next_refid::Int32
|
||||||
next_pos::Int32
|
next_pos::Int32
|
||||||
tlen::Int32
|
tlen::Int32
|
||||||
# variable length data
|
# variable length data
|
||||||
data::Vector{UInt8}
|
data::Vector{UInt8}
|
||||||
reader::Reader
|
reader::Union{Reader, Nothing}
|
||||||
|
|
||||||
function Record()
|
function Record()
|
||||||
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
|
return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
|
||||||
return a.block_size == b.block_size &&
|
return a.block_size == b.block_size &&
|
||||||
a.refid == b.refid &&
|
a.refid == b.refid &&
|
||||||
a.pos == b.pos &&
|
a.pos == b.pos &&
|
||||||
a.bin_mq_nl == b.bin_mq_nl &&
|
a.l_read_name == b.l_read_name &&
|
||||||
a.flag_nc == b.flag_nc &&
|
a.mapq == b.mapq &&
|
||||||
|
a.bin == b.bin &&
|
||||||
|
a.n_cigar_op == b.n_cigar_op &&
|
||||||
|
a.flag == b.flag &&
|
||||||
a.l_seq == b.l_seq &&
|
a.l_seq == b.l_seq &&
|
||||||
a.next_refid == b.next_refid &&
|
a.next_refid == b.next_refid &&
|
||||||
a.next_pos == b.next_pos &&
|
a.next_pos == b.next_pos &&
|
||||||
|
@ -60,19 +66,13 @@ end
|
||||||
|
|
||||||
function Base.copy(record::Record)
|
function Base.copy(record::Record)
|
||||||
copy = Record()
|
copy = Record()
|
||||||
copy.block_size = record.block_size
|
GC.@preserve copy record begin
|
||||||
copy.refid = record.refid
|
dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
|
||||||
copy.pos = record.pos
|
src_pointer = Ptr{UInt8}(pointer_from_objref(record))
|
||||||
copy.bin_mq_nl = record.bin_mq_nl
|
unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
|
||||||
copy.flag_nc = record.flag_nc
|
|
||||||
copy.l_seq = record.l_seq
|
|
||||||
copy.next_refid = record.next_refid
|
|
||||||
copy.next_pos = record.next_pos
|
|
||||||
copy.tlen = record.tlen
|
|
||||||
copy.data = record.data[1:data_size(record)]
|
|
||||||
if isdefined(record, :reader)
|
|
||||||
copy.reader = record.reader
|
|
||||||
end
|
end
|
||||||
|
copy.data = record.data[1:data_size(record)]
|
||||||
|
copy.reader = record.reader
|
||||||
return copy
|
return copy
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -80,8 +80,11 @@ function Base.empty!(record::Record)
|
||||||
record.block_size = 0
|
record.block_size = 0
|
||||||
record.refid = 0
|
record.refid = 0
|
||||||
record.pos = 0
|
record.pos = 0
|
||||||
record.bin_mq_nl = 0
|
record.l_read_name = 0
|
||||||
record.flag_nc = 0
|
record.mapq = 0
|
||||||
|
record.bin = 0
|
||||||
|
record.flag = 0
|
||||||
|
record.n_cigar_op = 0
|
||||||
record.l_seq = 0
|
record.l_seq = 0
|
||||||
record.next_refid = 0
|
record.next_refid = 0
|
||||||
record.next_pos = 0
|
record.next_pos = 0
|
||||||
|
@ -125,14 +128,9 @@ end
|
||||||
# Accessor Fuctions
|
# Accessor Fuctions
|
||||||
# -----------------
|
# -----------------
|
||||||
|
|
||||||
"""
|
|
||||||
flag(record::Record)::UInt16
|
|
||||||
|
|
||||||
Get the bitwise flag of `record`.
|
|
||||||
"""
|
|
||||||
function flag(record::Record)::UInt16
|
function flag(record::Record)::UInt16
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
return UInt16(record.flag_nc >> 16)
|
return record.flag
|
||||||
end
|
end
|
||||||
|
|
||||||
function hasflag(record::Record)
|
function hasflag(record::Record)
|
||||||
|
@ -318,7 +316,7 @@ end
|
||||||
Get the mapping quality of `record`.
|
Get the mapping quality of `record`.
|
||||||
"""
|
"""
|
||||||
function mappingquality(record::Record)::UInt8
|
function mappingquality(record::Record)::UInt8
|
||||||
return UInt8((record.bin_mq_nl >> 8) & 0xff)
|
return record.mapq
|
||||||
end
|
end
|
||||||
|
|
||||||
function hasmappingquality(record::Record)
|
function hasmappingquality(record::Record)
|
||||||
|
@ -397,7 +395,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
|
||||||
end
|
end
|
||||||
|
|
||||||
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
|
function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
|
||||||
cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF
|
cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
|
||||||
if !checkCG
|
if !checkCG
|
||||||
return cigaridx, nops
|
return cigaridx, nops
|
||||||
end
|
end
|
||||||
|
@ -507,13 +505,16 @@ function hastemplength(record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
sequence(record::Record)::BioSequences.LongDNASeq
|
sequence(record::Record)::BioSequences.LongDNA{4}
|
||||||
|
|
||||||
Get the segment sequence of `record`.
|
Get the segment sequence of `record`.
|
||||||
"""
|
"""
|
||||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
function sequence(record::Record)
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
seqlen = seqlength(record)
|
seqlen = seqlength(record)
|
||||||
|
if seqlen == 0
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
||||||
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
|
src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
|
||||||
for i in 1:lastindex(data)
|
for i in 1:lastindex(data)
|
||||||
|
@ -521,7 +522,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq
|
||||||
x = unsafe_load(src, i)
|
x = unsafe_load(src, i)
|
||||||
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
||||||
end
|
end
|
||||||
return BioSequences.LongDNASeq(data, 1:seqlen, false)
|
return BioSequences.LongDNA{4}(data, UInt(seqlen))
|
||||||
end
|
end
|
||||||
|
|
||||||
function hassequence(record::Record)
|
function hassequence(record::Record)
|
||||||
|
@ -543,15 +544,15 @@ function hasseqlength(record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
quality(record::Record)::Vector{UInt8}
|
quality(record::Record)
|
||||||
|
|
||||||
Get the base quality of `record`.
|
Get the base quality of `record`.
|
||||||
"""
|
"""
|
||||||
function quality(record::Record)::Vector{UInt8}
|
function quality(record::Record)
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
seqlen = seqlength(record)
|
seqlen = seqlength(record)
|
||||||
offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2)
|
offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2)
|
||||||
return [reinterpret(Int8, record.data[i+offset]) for i in 1:seqlen]
|
return record.data[(1+offset):(seqlen+offset)]
|
||||||
end
|
end
|
||||||
|
|
||||||
function hasquality(record::Record)
|
function hasquality(record::Record)
|
||||||
|
@ -661,5 +662,5 @@ end
|
||||||
|
|
||||||
# Return the length of the read name.
|
# Return the length of the read name.
|
||||||
function seqname_length(record::Record)
|
function seqname_length(record::Record)
|
||||||
return record.bin_mq_nl & 0xff
|
return record.l_read_name
|
||||||
end
|
end
|
||||||
|
|
|
@ -37,7 +37,7 @@ function Base.write(writer::Writer, record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
function write_header(stream, header, refseqnames, refseqlens)
|
function write_header(stream, header, refseqnames, refseqlens)
|
||||||
@assert length(refseqnames) == length(refseqlens)
|
@assert length(refseqnames) == length(refseqlens) "Lengths of refseq names and lengths must match."
|
||||||
n = 0
|
n = 0
|
||||||
|
|
||||||
# magic bytes
|
# magic bytes
|
||||||
|
|
|
@ -16,11 +16,14 @@ for (name, bits, doc) in [
|
||||||
(:QCFAIL, UInt16(0x200), "QC failure" ),
|
(:QCFAIL, UInt16(0x200), "QC failure" ),
|
||||||
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
|
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
|
||||||
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
|
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
|
||||||
@assert bits isa UInt16
|
@assert bits isa UInt16 "The bits must be of type UInt16."
|
||||||
sym = Symbol("FLAG_", name)
|
sym = Symbol("FLAG_", name)
|
||||||
doc = string(@sprintf("0x%04x: ", bits), doc)
|
docstring = """ $sym
|
||||||
@eval begin
|
SAM/BAM flag: $doc
|
||||||
@doc $(doc) const $(sym) = $(bits)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
|
See also: [`flag`](@ref)
|
||||||
|
"""
|
||||||
|
@eval begin
|
||||||
|
@doc $(docstring) const $(sym) = $(bits)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -38,7 +38,7 @@ function Base.iterate(header::Header, i=1)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
find(header::Header, key::AbstractString)::Vector{MetaInfo}
|
findall(header::Header, key::AbstractString)::Vector{MetaInfo}
|
||||||
|
|
||||||
Find metainfo objects satisfying `SAM.tag(metainfo) == key`.
|
Find metainfo objects satisfying `SAM.tag(metainfo) == key`.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -10,12 +10,16 @@ function Reader(state::State{S}) where {S <: TranscodingStream}
|
||||||
|
|
||||||
rdr = Reader(state, Header())
|
rdr = Reader(state, Header())
|
||||||
|
|
||||||
cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
|
cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
|
||||||
|
|
||||||
rdr.state.state = sam_machine_body.start_state
|
rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body.
|
||||||
rdr.state.linenum = ln
|
rdr.state.linenum = ln
|
||||||
rdr.state.filled = false
|
rdr.state.filled = false
|
||||||
|
|
||||||
|
if cs != -1 && cs != 0 #Note: the header is finished when the state machine fails to transition after a new line (state 1).
|
||||||
|
throw(ArgumentError("Malformed SAM file header at line $(ln). Machine failed to transition from state $(cs)."))
|
||||||
|
end
|
||||||
|
|
||||||
return rdr
|
return rdr
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -65,18 +69,18 @@ end
|
||||||
|
|
||||||
function index!(record::MetaInfo)
|
function index!(record::MetaInfo)
|
||||||
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
||||||
found = index!(stream, record)
|
cs = index!(stream, record)
|
||||||
if !found
|
if cs != 0
|
||||||
throw(ArgumentError("invalid SAM metadata"))
|
throw(ArgumentError("Invalid SAM metadata. Machine failed to transition from state $(cs)."))
|
||||||
end
|
end
|
||||||
return record
|
return record
|
||||||
end
|
end
|
||||||
|
|
||||||
function index!(record::Record)
|
function index!(record::Record)
|
||||||
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
|
||||||
found = index!(stream, record)
|
cs = index!(stream, record)
|
||||||
if !found
|
if cs != 0
|
||||||
throw(ArgumentError("invalid SAM record"))
|
throw(ArgumentError("Invalid SAM record. Machine failed to transition from state $(cs)."))
|
||||||
end
|
end
|
||||||
return record
|
return record
|
||||||
end
|
end
|
||||||
|
@ -110,5 +114,6 @@ function Base.read!(rdr::Reader, record::Record)
|
||||||
throw(EOFError())
|
throw(EOFError())
|
||||||
end
|
end
|
||||||
|
|
||||||
throw(ArgumentError("malformed SAM file"))
|
throw(ArgumentError("Malformed SAM file record at line $(ln). Machine failed to transition from state $(cs)."))
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -41,7 +41,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
|
||||||
co.actions[:enter] = [:pos1]
|
co.actions[:enter] = [:pos1]
|
||||||
co.actions[:exit] = [:metainfo_tag]
|
co.actions[:exit] = [:metainfo_tag]
|
||||||
|
|
||||||
comment = re"[^\r\n]*"
|
comment = re"[^\r\n]*" # Note: Only single line comments are allowed.
|
||||||
comment.actions[:enter] = [:pos1]
|
comment.actions[:enter] = [:pos1]
|
||||||
comment.actions[:exit] = [:metainfo_val]
|
comment.actions[:exit] = [:metainfo_val]
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
|
||||||
header = rep(cat(metainfo, newline))
|
header = rep(cat(metainfo, newline))
|
||||||
header.actions[:exit] = [:header]
|
header.actions[:exit] = [:header]
|
||||||
|
|
||||||
body = record * rep(newline * record) * opt(newline)
|
body = rep(cat(record, newline))
|
||||||
body.actions[:exit] = [:body]
|
body.actions[:exit] = [:body]
|
||||||
|
|
||||||
sam = cat(header, body)
|
sam = cat(header, body)
|
||||||
|
@ -168,13 +168,6 @@ function appendfrom!(dst, dpos, src, spos, n)
|
||||||
return dst
|
return dst
|
||||||
end
|
end
|
||||||
|
|
||||||
const action_metainfo = quote
|
|
||||||
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
|
|
||||||
metainfo.filled = 1:(p-@markpos)
|
|
||||||
|
|
||||||
found_metainfo = true
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_actions_metainfo = Dict(
|
const sam_actions_metainfo = Dict(
|
||||||
:mark => :(@mark),
|
:mark => :(@mark),
|
||||||
:pos1 => :(pos1 = @relpos(p)),
|
:pos1 => :(pos1 = @relpos(p)),
|
||||||
|
@ -183,7 +176,10 @@ const sam_actions_metainfo = Dict(
|
||||||
:metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
|
:metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
|
||||||
:metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
|
:metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
|
||||||
:metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
|
:metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
|
||||||
:metainfo => action_metainfo
|
:metainfo => quote
|
||||||
|
appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
|
||||||
|
metainfo.filled = 1:(p-@markpos)
|
||||||
|
end
|
||||||
)
|
)
|
||||||
|
|
||||||
const sam_actions_header = merge(
|
const sam_actions_header = merge(
|
||||||
|
@ -191,16 +187,11 @@ const sam_actions_header = merge(
|
||||||
Dict(
|
Dict(
|
||||||
:countline => :(linenum += 1),
|
:countline => :(linenum += 1),
|
||||||
:metainfo => quote
|
:metainfo => quote
|
||||||
$(action_metainfo)
|
$(sam_actions_metainfo[:metainfo])
|
||||||
push!(header, metainfo)
|
push!(header, metainfo)
|
||||||
metainfo = MetaInfo()
|
metainfo = MetaInfo()
|
||||||
end,
|
end,
|
||||||
:header => quote
|
:header => :(@escape)
|
||||||
|
|
||||||
finish_header = true
|
|
||||||
|
|
||||||
@escape
|
|
||||||
end
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -222,9 +213,6 @@ const sam_actions_record = Dict(
|
||||||
:record => quote
|
:record => quote
|
||||||
appendfrom!(record.data, 1, data, @markpos, p-@markpos)
|
appendfrom!(record.data, 1, data, @markpos, p-@markpos)
|
||||||
record.filled = 1:(p-@markpos)
|
record.filled = 1:(p-@markpos)
|
||||||
|
|
||||||
found_record = true
|
|
||||||
@escape
|
|
||||||
end
|
end
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -232,18 +220,15 @@ const sam_actions_body = merge(
|
||||||
sam_actions_record,
|
sam_actions_record,
|
||||||
Dict(
|
Dict(
|
||||||
:countline => :(linenum += 1),
|
:countline => :(linenum += 1),
|
||||||
:body => quote
|
:record => quote
|
||||||
finish_body = true
|
found_record = true
|
||||||
|
$(sam_actions_record[:record])
|
||||||
@escape
|
@escape
|
||||||
end
|
end,
|
||||||
|
:body => :(@escape)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# const sam_actions = merge(
|
|
||||||
# sam_actions_header,
|
|
||||||
# sam_actions_body
|
|
||||||
# )
|
|
||||||
|
|
||||||
const sam_context = Automa.CodeGenContext(
|
const sam_context = Automa.CodeGenContext(
|
||||||
generator = :goto,
|
generator = :goto,
|
||||||
checkbounds = false,
|
checkbounds = false,
|
||||||
|
@ -253,43 +238,24 @@ const sam_context = Automa.CodeGenContext(
|
||||||
const sam_initcode_metainfo = quote
|
const sam_initcode_metainfo = quote
|
||||||
pos1 = 0
|
pos1 = 0
|
||||||
pos2 = 0
|
pos2 = 0
|
||||||
found_metainfo = false
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_initcode_record = quote
|
|
||||||
pos = 0
|
|
||||||
found_record = false
|
|
||||||
end
|
end
|
||||||
|
|
||||||
const sam_initcode_header = quote
|
const sam_initcode_header = quote
|
||||||
$(sam_initcode_metainfo)
|
$(sam_initcode_metainfo)
|
||||||
metainfo = MetaInfo()
|
metainfo = MetaInfo()
|
||||||
finish_header = false
|
|
||||||
cs, linenum = state
|
cs, linenum = state
|
||||||
end
|
end
|
||||||
|
|
||||||
|
const sam_initcode_record = quote
|
||||||
|
pos = 0
|
||||||
|
end
|
||||||
|
|
||||||
const sam_initcode_body = quote
|
const sam_initcode_body = quote
|
||||||
$(sam_initcode_record)
|
$(sam_initcode_record)
|
||||||
finish_body = false
|
found_record = false
|
||||||
cs, linenum = state
|
cs, linenum = state
|
||||||
end
|
end
|
||||||
|
|
||||||
const sam_loopcode_metainfo = quote
|
|
||||||
|
|
||||||
if cs < 0
|
|
||||||
throw(ArgumentError("malformed metainfo at pos $(p)"))
|
|
||||||
end
|
|
||||||
|
|
||||||
if found_metainfo
|
|
||||||
@goto __return__
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_returncode_metainfo = quote
|
|
||||||
return found_metainfo
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
Automa.Stream.generate_reader(
|
Automa.Stream.generate_reader(
|
||||||
:index!,
|
:index!,
|
||||||
sam_machine_metainfo,
|
sam_machine_metainfo,
|
||||||
|
@ -297,19 +263,10 @@ Automa.Stream.generate_reader(
|
||||||
actions = sam_actions_metainfo,
|
actions = sam_actions_metainfo,
|
||||||
context = sam_context,
|
context = sam_context,
|
||||||
initcode = sam_initcode_metainfo,
|
initcode = sam_initcode_metainfo,
|
||||||
loopcode = sam_loopcode_metainfo,
|
|
||||||
returncode = sam_returncode_metainfo
|
|
||||||
) |> eval
|
) |> eval
|
||||||
|
|
||||||
const sam_loopcode_header = quote
|
|
||||||
|
|
||||||
if finish_header
|
|
||||||
@goto __return__
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_returncode_header = quote
|
const sam_returncode_header = quote
|
||||||
return cs, linenum, finish_header
|
return cs, linenum
|
||||||
end
|
end
|
||||||
|
|
||||||
Automa.Stream.generate_reader(
|
Automa.Stream.generate_reader(
|
||||||
|
@ -319,38 +276,13 @@ Automa.Stream.generate_reader(
|
||||||
actions = sam_actions_header,
|
actions = sam_actions_header,
|
||||||
context = sam_context,
|
context = sam_context,
|
||||||
initcode = sam_initcode_header,
|
initcode = sam_initcode_header,
|
||||||
loopcode = sam_loopcode_header,
|
|
||||||
returncode = sam_returncode_header
|
returncode = sam_returncode_header
|
||||||
) |> eval
|
) |> eval
|
||||||
|
|
||||||
|
const sam_loopcode_body = quote
|
||||||
const sam_loopcode_record = quote
|
|
||||||
|
|
||||||
if cs < 0
|
|
||||||
throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)"))
|
|
||||||
end
|
|
||||||
|
|
||||||
# # if cs != 0
|
|
||||||
# # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
|
|
||||||
# # end
|
|
||||||
|
|
||||||
if found_record
|
if found_record
|
||||||
@goto __return__
|
@goto __return__
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_loopcode_body = quote
|
|
||||||
|
|
||||||
$(sam_loopcode_record)
|
|
||||||
|
|
||||||
if finish_body
|
|
||||||
@goto __return__
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
const sam_returncode_record = quote
|
|
||||||
return found_record
|
|
||||||
end
|
end
|
||||||
|
|
||||||
Automa.Stream.generate_reader(
|
Automa.Stream.generate_reader(
|
||||||
|
@ -360,12 +292,9 @@ Automa.Stream.generate_reader(
|
||||||
actions = sam_actions_record,
|
actions = sam_actions_record,
|
||||||
context = sam_context,
|
context = sam_context,
|
||||||
initcode = sam_initcode_record,
|
initcode = sam_initcode_record,
|
||||||
loopcode = sam_loopcode_record,
|
|
||||||
returncode = sam_returncode_record
|
|
||||||
) |> eval
|
) |> eval
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const sam_returncode_body = quote
|
const sam_returncode_body = quote
|
||||||
return cs, linenum, found_record
|
return cs, linenum, found_record
|
||||||
end
|
end
|
||||||
|
|
|
@ -2,10 +2,11 @@
|
||||||
# ==========
|
# ==========
|
||||||
|
|
||||||
mutable struct Record
|
mutable struct Record
|
||||||
# data and filled range
|
# Data and filled range.
|
||||||
data::Vector{UInt8}
|
data::Vector{UInt8}
|
||||||
filled::UnitRange{Int}
|
filled::UnitRange{Int} # Note: Specifies the data in use.
|
||||||
# indexes
|
|
||||||
|
# Mandatory fields.
|
||||||
qname::UnitRange{Int}
|
qname::UnitRange{Int}
|
||||||
flag::UnitRange{Int}
|
flag::UnitRange{Int}
|
||||||
rname::UnitRange{Int}
|
rname::UnitRange{Int}
|
||||||
|
@ -17,6 +18,8 @@ mutable struct Record
|
||||||
tlen::UnitRange{Int}
|
tlen::UnitRange{Int}
|
||||||
seq::UnitRange{Int}
|
seq::UnitRange{Int}
|
||||||
qual::UnitRange{Int}
|
qual::UnitRange{Int}
|
||||||
|
|
||||||
|
# Auxiliary fields.
|
||||||
fields::Vector{UnitRange{Int}}
|
fields::Vector{UnitRange{Int}}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -147,11 +150,6 @@ end
|
||||||
# Accessor Functions
|
# Accessor Functions
|
||||||
# ------------------
|
# ------------------
|
||||||
|
|
||||||
"""
|
|
||||||
flag(record::Record)::UInt16
|
|
||||||
|
|
||||||
Get the bitwise flag of `record`.
|
|
||||||
"""
|
|
||||||
function flag(record::Record)::UInt16
|
function flag(record::Record)::UInt16
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
return unsafe_parse_decimal(UInt16, record.data, record.flag)
|
return unsafe_parse_decimal(UInt16, record.data, record.flag)
|
||||||
|
@ -206,9 +204,9 @@ Get the 1-based leftmost mapping position of `record`.
|
||||||
function position(record::Record)::Int
|
function position(record::Record)::Int
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
pos = unsafe_parse_decimal(Int, record.data, record.pos)
|
pos = unsafe_parse_decimal(Int, record.data, record.pos)
|
||||||
if pos == 0
|
# if pos == 0
|
||||||
missingerror(:position)
|
# missingerror(:position)
|
||||||
end
|
# end
|
||||||
return pos
|
return pos
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -263,9 +261,9 @@ Get the position of the mate/next read of `record`.
|
||||||
function nextposition(record::Record)::Int
|
function nextposition(record::Record)::Int
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
pos = unsafe_parse_decimal(Int, record.data, record.pnext)
|
pos = unsafe_parse_decimal(Int, record.data, record.pnext)
|
||||||
if pos == 0
|
# if pos == 0
|
||||||
missingerror(:nextposition)
|
# missingerror(:nextposition)
|
||||||
end
|
# end
|
||||||
return pos
|
return pos
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -299,7 +297,8 @@ Get the CIGAR string of `record`.
|
||||||
function cigar(record::Record)::String
|
function cigar(record::Record)::String
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
if ismissing(record, record.cigar)
|
if ismissing(record, record.cigar)
|
||||||
missingerror(:cigar)
|
# missingerror(:cigar)
|
||||||
|
return ""
|
||||||
end
|
end
|
||||||
return String(record.data[record.cigar])
|
return String(record.data[record.cigar])
|
||||||
end
|
end
|
||||||
|
@ -377,9 +376,9 @@ Get the template length of `record`.
|
||||||
function templength(record::Record)::Int
|
function templength(record::Record)::Int
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
len = unsafe_parse_decimal(Int, record.data, record.tlen)
|
len = unsafe_parse_decimal(Int, record.data, record.tlen)
|
||||||
if len == 0
|
# if len == 0
|
||||||
missingerror(:tlen)
|
# missingerror(:tlen)
|
||||||
end
|
# end
|
||||||
return len
|
return len
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -388,17 +387,18 @@ function hastemplength(record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
sequence(record::Record)::BioSequences.LongDNASeq
|
sequence(record::Record)::BioSequences.LongDNA{4}
|
||||||
|
|
||||||
Get the segment sequence of `record`.
|
Get the segment sequence of `record`.
|
||||||
"""
|
"""
|
||||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
function sequence(record::Record)
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
if ismissing(record, record.seq)
|
if ismissing(record, record.seq)
|
||||||
missingerror(:sequence)
|
# missingerror(:sequence)
|
||||||
|
return nothing
|
||||||
end
|
end
|
||||||
seqlen = length(record.seq)
|
seqlen = length(record.seq)
|
||||||
ret = BioSequences.LongDNASeq(seqlen)
|
ret = BioSequences.LongDNA{4}(undef, seqlen)
|
||||||
copyto!(ret, 1, record.data, first(record.seq), seqlen)
|
copyto!(ret, 1, record.data, first(record.seq), seqlen)
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
|
@ -494,7 +494,7 @@ function Base.getindex(record::Record, tag::AbstractString)
|
||||||
end
|
end
|
||||||
|
|
||||||
if typ == UInt8('A')
|
if typ == UInt8('A')
|
||||||
@assert lo == hi
|
@assert lo == hi "Values lo and hi must be equivalent."
|
||||||
return Char(record.data[lo])
|
return Char(record.data[lo])
|
||||||
end
|
end
|
||||||
if typ == UInt8('i')
|
if typ == UInt8('i')
|
||||||
|
|
|
@ -11,6 +11,7 @@ import BioGenerics.Exceptions: missingerror
|
||||||
import BioGenerics.Automa: State
|
import BioGenerics.Automa: State
|
||||||
import BioSequences
|
import BioSequences
|
||||||
import TranscodingStreams: TranscodingStreams, TranscodingStream
|
import TranscodingStreams: TranscodingStreams, TranscodingStream
|
||||||
|
import ..XAM: flag
|
||||||
|
|
||||||
using Printf: @sprintf
|
using Printf: @sprintf
|
||||||
|
|
||||||
|
@ -46,20 +47,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I
|
||||||
return sign * x
|
return sign * x
|
||||||
end
|
end
|
||||||
|
|
||||||
# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams).
|
|
||||||
# import BufferedStreams: BufferedStreams, BufferedInputStream
|
|
||||||
# # A type keeping track of a ragel-based parser's state.
|
|
||||||
# mutable struct State{T<:BufferedInputStream}
|
|
||||||
# stream::T # input stream
|
|
||||||
# cs::Int # current DFA state of Ragel
|
|
||||||
# linenum::Int # line number: parser is responsible for updating this
|
|
||||||
# finished::Bool # true if finished (regardless of where in the stream we are)
|
|
||||||
# end
|
|
||||||
|
|
||||||
# function State(initstate::Int, input::BufferedInputStream)
|
|
||||||
# return State(input, initstate, 1, false)
|
|
||||||
# end
|
|
||||||
|
|
||||||
|
|
||||||
include("flags.jl")
|
include("flags.jl")
|
||||||
include("metainfo.jl")
|
include("metainfo.jl")
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
using Test
|
using Test
|
||||||
|
using Documenter
|
||||||
|
|
||||||
using BioGenerics
|
using BioGenerics
|
||||||
using FormatSpecimens
|
using FormatSpecimens
|
||||||
|
@ -23,6 +24,13 @@ function randrange(range)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@testset "XAM" begin
|
||||||
include("test_sam.jl")
|
include("test_sam.jl")
|
||||||
include("test_bam.jl")
|
include("test_bam.jl")
|
||||||
include("test_issues.jl")
|
include("test_issues.jl")
|
||||||
|
include("test_crosscheck.jl")
|
||||||
|
|
||||||
|
# Include doctests.
|
||||||
|
DocMeta.setdocmeta!(XAM, :DocTestSetup, :(using XAM); recursive=true)
|
||||||
|
doctest(XAM; manual = false)
|
||||||
|
end
|
||||||
|
|
|
@ -104,9 +104,12 @@
|
||||||
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
|
unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
|
||||||
end
|
end
|
||||||
new_record = convert(BAM.Record, array)
|
new_record = convert(BAM.Record, array)
|
||||||
@test record.bin_mq_nl == new_record.bin_mq_nl
|
@test record.l_read_name == new_record.l_read_name
|
||||||
|
@test record.mapq == new_record.mapq
|
||||||
|
@test record.bin == new_record.bin
|
||||||
@test record.block_size == new_record.block_size
|
@test record.block_size == new_record.block_size
|
||||||
@test record.flag_nc == new_record.flag_nc
|
@test record.flag == new_record.flag
|
||||||
|
@test record.n_cigar_op == new_record.n_cigar_op
|
||||||
@test record.l_seq == new_record.l_seq
|
@test record.l_seq == new_record.l_seq
|
||||||
@test record.next_refid == new_record.next_refid
|
@test record.next_refid == new_record.next_refid
|
||||||
@test record.next_pos == new_record.next_pos
|
@test record.next_pos == new_record.next_pos
|
||||||
|
@ -162,18 +165,21 @@
|
||||||
if length(xs) != length(ys)
|
if length(xs) != length(ys)
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
for (x, y) in zip(xs, ys)
|
for (a, b) in zip(xs, ys)
|
||||||
if !(
|
if !(
|
||||||
x.block_size == y.block_size &&
|
a.block_size == b.block_size &&
|
||||||
x.refid == y.refid &&
|
a.refid == b.refid &&
|
||||||
x.pos == y.pos &&
|
a.pos == b.pos &&
|
||||||
x.bin_mq_nl == y.bin_mq_nl &&
|
a.l_read_name == b.l_read_name &&
|
||||||
x.flag_nc == y.flag_nc &&
|
a.mapq == b.mapq &&
|
||||||
x.l_seq == y.l_seq &&
|
a.bin == b.bin &&
|
||||||
x.next_refid == y.next_refid &&
|
a.n_cigar_op == b.n_cigar_op &&
|
||||||
x.next_pos == y.next_pos &&
|
a.flag == b.flag &&
|
||||||
x.tlen == y.tlen &&
|
a.l_seq == b.l_seq &&
|
||||||
x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
|
a.next_refid == b.next_refid &&
|
||||||
|
a.next_pos == b.next_pos &&
|
||||||
|
a.tlen == b.tlen &&
|
||||||
|
a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
|
||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
54
test/test_crosscheck.jl
Normal file
54
test/test_crosscheck.jl
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
@testset "Cross Check Properties" begin
|
||||||
|
|
||||||
|
Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl.
|
||||||
|
Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl
|
||||||
|
|
||||||
|
function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol)
|
||||||
|
|
||||||
|
bam_property = getproperty(XAM.BAM, property)
|
||||||
|
sam_property = getproperty(XAM.SAM, property)
|
||||||
|
|
||||||
|
if bam_property(bam) != sam_property(sam)
|
||||||
|
@warn "$property result is not the same" bam_property(bam) sam_property(sam)
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
samdir = path_of_format("SAM")
|
||||||
|
bamdir = path_of_format("BAM")
|
||||||
|
|
||||||
|
filenames = [
|
||||||
|
"ce#1",
|
||||||
|
"ce#2",
|
||||||
|
"ce#5",
|
||||||
|
"ce#5b",
|
||||||
|
"ce#unmap",
|
||||||
|
"ce#unmap1",
|
||||||
|
"ce#unmap2",
|
||||||
|
]
|
||||||
|
|
||||||
|
properties = [
|
||||||
|
:position,# POS
|
||||||
|
:tempname,# QNAME
|
||||||
|
:mappingquality,# MAPQ
|
||||||
|
:cigar, # CIGAR
|
||||||
|
:flag, # FLAG
|
||||||
|
:sequence, # SEQ
|
||||||
|
:nextposition, # PNEXT
|
||||||
|
:templength, # TLEN
|
||||||
|
]
|
||||||
|
|
||||||
|
for filename in filenames
|
||||||
|
|
||||||
|
records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam")))
|
||||||
|
records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam")))
|
||||||
|
|
||||||
|
for (bam, sam) in zip(records_bam, records_sam)
|
||||||
|
@test all(crosscheck.(bam, sam, properties)) == true
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end # testset Crosscheck
|
Loading…
Reference in a new issue