mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-11-23 02:09:55 +00:00
Compare commits
No commits in common. "560a5cd8dfb5d6c732c4e37ecc6c2c30f9986a0d" and "946e77a7343bff28da35360820655e9960236b0f" have entirely different histories.
560a5cd8df
...
946e77a734
17 changed files with 167 additions and 309 deletions
21
CHANGELOG.md
21
CHANGELOG.md
|
@ -7,23 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
## [0.3.0]
|
||||||
- Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56/files))
|
|
||||||
- Added doi badge
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Subtype from XAMReader and XAMWriter from common abstract types.
|
|
||||||
- Subtype from XAMRecord.
|
|
||||||
- Unified flag queries.
|
|
||||||
|
|
||||||
## [0.3.1]
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Upgraded to BioAlignments v3 ([#55](https://github.com/BioJulia/XAM.jl/pull/55))
|
|
||||||
|
|
||||||
## [0.3.0] - 2022-10-10
|
|
||||||
|
|
||||||
## Added
|
## Added
|
||||||
|
|
||||||
|
@ -39,6 +23,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
|
- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
|
||||||
|
|
||||||
[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.1...HEAD
|
[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD
|
||||||
[0.3.1]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...v0.3.1
|
|
||||||
[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0
|
[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
name = "XAM"
|
name = "XAM"
|
||||||
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
||||||
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
||||||
version = "0.3.1"
|
version = "0.3.0"
|
||||||
|
|
||||||
[deps]
|
[deps]
|
||||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||||
|
@ -17,7 +17,7 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
||||||
[compat]
|
[compat]
|
||||||
Automa = "0.7, 0.8"
|
Automa = "0.7, 0.8"
|
||||||
BGZFStreams = "0.3.1"
|
BGZFStreams = "0.3.1"
|
||||||
BioAlignments = "3"
|
BioAlignments = "2.2"
|
||||||
BioGenerics = "0.1"
|
BioGenerics = "0.1"
|
||||||
BioSequences = "3"
|
BioSequences = "3"
|
||||||
FormatSpecimens = "1.1"
|
FormatSpecimens = "1.1"
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
|
[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
|
||||||
[![Latest Release](https://img.shields.io/github/release/BioJulia/XAM.jl.svg)](https://github.com/BioJulia/XAM.jl/releases/latest)
|
[![Latest Release](https://img.shields.io/github/release/BioJulia/XAM.jl.svg)](https://github.com/BioJulia/XAM.jl/releases/latest)
|
||||||
[![DOI](https://zenodo.org/badge/201858041.svg)](https://zenodo.org/badge/latestdoi/201858041)
|
|
||||||
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
[![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
|
||||||
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
[![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
|
||||||
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
[![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
|
||||||
|
@ -66,4 +65,4 @@ Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
|
|
||||||
## Questions?
|
## Questions?
|
||||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.slack.com/channels/biology), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||||
|
|
|
@ -64,4 +64,4 @@ Your logo will show up here with a link to your website.
|
||||||
|
|
||||||
|
|
||||||
## Questions?
|
## Questions?
|
||||||
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.slack.com/channels/biology), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
|
||||||
|
|
26
src/XAM.jl
26
src/XAM.jl
|
@ -1,17 +1,29 @@
|
||||||
module XAM
|
module XAM
|
||||||
|
|
||||||
using BioGenerics
|
|
||||||
import BioGenerics: isfilled #Note: used by `ismapped`.
|
|
||||||
|
|
||||||
export
|
export
|
||||||
SAM,
|
SAM,
|
||||||
BAM
|
BAM
|
||||||
|
|
||||||
abstract type XAMRecord end
|
"""
|
||||||
abstract type XAMReader <: BioGenerics.IO.AbstractReader end
|
flag(record::Union{SAM.Record, BAM.Record})::UInt16
|
||||||
abstract type XAMWriter <: BioGenerics.IO.AbstractWriter end
|
|
||||||
|
|
||||||
include("flags.jl")
|
Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag
|
||||||
|
being OR'd together. The possible flags are:
|
||||||
|
|
||||||
|
0x0001 template having multiple segments in sequencing
|
||||||
|
0x0002 each segment properly aligned according to the aligner
|
||||||
|
0x0004 segment unmapped
|
||||||
|
0x0008 next segment in the template unmapped
|
||||||
|
0x0010 SEQ being reverse complemented
|
||||||
|
0x0020 SEQ of the next segment in the template being reverse complemented
|
||||||
|
0x0040 the first segment in the template
|
||||||
|
0x0080 the last segment in the template
|
||||||
|
0x0100 secondary alignment
|
||||||
|
0x0200 not passing filters, such as platform/vendor quality controls
|
||||||
|
0x0400 PCR or optical duplicate
|
||||||
|
0x0800 supplementary alignment
|
||||||
|
"""
|
||||||
|
function flag end
|
||||||
|
|
||||||
include("sam/sam.jl")
|
include("sam/sam.jl")
|
||||||
include("bam/bam.jl")
|
include("bam/bam.jl")
|
||||||
|
|
|
@ -6,8 +6,7 @@ module BAM
|
||||||
using BioGenerics
|
using BioGenerics
|
||||||
using GenomicFeatures
|
using GenomicFeatures
|
||||||
using XAM.SAM
|
using XAM.SAM
|
||||||
import ..XAM: flag, XAMRecord, XAMReader, XAMWriter,
|
import ..XAM: flag
|
||||||
ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API.
|
|
||||||
|
|
||||||
import BGZFStreams
|
import BGZFStreams
|
||||||
import BioAlignments
|
import BioAlignments
|
||||||
|
|
|
@ -8,9 +8,9 @@ Create a data reader of the BAM file format.
|
||||||
|
|
||||||
# Arguments
|
# Arguments
|
||||||
* `input`: data source
|
* `input`: data source
|
||||||
* `index=nothing`: filepath to a random access index (currently *bai* is supported) or BAI object
|
* `index=nothing`: filepath to a random access index (currently *bai* is supported)
|
||||||
"""
|
"""
|
||||||
mutable struct Reader{T} <: XAMReader
|
mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
|
||||||
stream::BGZFStreams.BGZFStream{T}
|
stream::BGZFStreams.BGZFStream{T}
|
||||||
header::SAM.Header
|
header::SAM.Header
|
||||||
start_offset::BGZFStreams.VirtualOffset
|
start_offset::BGZFStreams.VirtualOffset
|
||||||
|
@ -28,8 +28,13 @@ function BioGenerics.IO.stream(reader::Reader)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Reader(input::IO; index=nothing)
|
function Reader(input::IO; index=nothing)
|
||||||
|
if isa(index, AbstractString)
|
||||||
|
index = BAI(index)
|
||||||
|
elseif index != nothing
|
||||||
|
error("unrecognizable index argument")
|
||||||
|
end
|
||||||
reader = init_bam_reader(input)
|
reader = init_bam_reader(input)
|
||||||
reader.index = init_bam_index(index)
|
reader.index = index
|
||||||
return reader
|
return reader
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -120,11 +125,6 @@ function init_bam_reader(input::IO)
|
||||||
return init_bam_reader(BGZFStreams.BGZFStream(input))
|
return init_bam_reader(BGZFStreams.BGZFStream(input))
|
||||||
end
|
end
|
||||||
|
|
||||||
init_bam_index(index::AbstractString) = BAI(index)
|
|
||||||
init_bam_index(index::BAI) = index
|
|
||||||
init_bam_index(index::Nothing) = nothing
|
|
||||||
init_bam_index(index) = error("unrecognizable index argument")
|
|
||||||
|
|
||||||
function _read!(reader::Reader, record)
|
function _read!(reader::Reader, record)
|
||||||
unsafe_read(
|
unsafe_read(
|
||||||
reader.stream,
|
reader.stream,
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
Create an unfilled BAM record.
|
Create an unfilled BAM record.
|
||||||
"""
|
"""
|
||||||
mutable struct Record <: XAMRecord
|
mutable struct Record
|
||||||
# fixed-length fields (see BMA specs for the details)
|
# fixed-length fields (see BMA specs for the details)
|
||||||
block_size::Int32
|
block_size::Int32
|
||||||
refid::Int32
|
refid::Int32
|
||||||
|
@ -137,6 +137,37 @@ function hasflag(record::Record)
|
||||||
return isfilled(record)
|
return isfilled(record)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
ismapped(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if `record` is mapped.
|
||||||
|
"""
|
||||||
|
function ismapped(record::Record)::Bool
|
||||||
|
return flag(record) & SAM.FLAG_UNMAP == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
isprimary(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if `record` is a primary line of the read.
|
||||||
|
|
||||||
|
This is equivalent to `flag(record) & 0x900 == 0`.
|
||||||
|
"""
|
||||||
|
function isprimary(record::Record)::Bool
|
||||||
|
return flag(record) & 0x900 == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
ispositivestrand(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if `record` is aligned to the positive strand.
|
||||||
|
|
||||||
|
This is equivalent to `flag(record) & 0x10 == 0`.
|
||||||
|
"""
|
||||||
|
function ispositivestrand(record::Record)::Bool
|
||||||
|
flag(record) & 0x10 == 0
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
refid(record::Record)::Int
|
refid(record::Record)::Int
|
||||||
|
|
||||||
|
@ -222,6 +253,15 @@ function hasrightposition(record::Record)
|
||||||
return isfilled(record) && ismapped(record)
|
return isfilled(record) && ismapped(record)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
isnextmapped(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if the mate/next read of `record` is mapped.
|
||||||
|
"""
|
||||||
|
function isnextmapped(record::Record)::Bool
|
||||||
|
return isfilled(record) && (flag(record) & SAM.FLAG_MUNMAP == 0)
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
nextrefid(record::Record)::Int
|
nextrefid(record::Record)::Int
|
||||||
|
|
||||||
|
@ -390,12 +430,28 @@ end
|
||||||
Get the alignment of `record`.
|
Get the alignment of `record`.
|
||||||
"""
|
"""
|
||||||
function alignment(record::Record)::BioAlignments.Alignment
|
function alignment(record::Record)::BioAlignments.Alignment
|
||||||
if ismapped(record)
|
checkfilled(record)
|
||||||
return BioAlignments.Alignment(cigar(record), 1, position(record))
|
if !ismapped(record)
|
||||||
end
|
|
||||||
|
|
||||||
return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
|
return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[])
|
||||||
end
|
end
|
||||||
|
seqpos = 0
|
||||||
|
refpos = position(record) - 1
|
||||||
|
anchors = [BioAlignments.AlignmentAnchor(seqpos, refpos, BioAlignments.OP_START)]
|
||||||
|
for (op, len) in zip(cigar_rle(record)...)
|
||||||
|
if BioAlignments.ismatchop(op)
|
||||||
|
seqpos += len
|
||||||
|
refpos += len
|
||||||
|
elseif BioAlignments.isinsertop(op)
|
||||||
|
seqpos += len
|
||||||
|
elseif BioAlignments.isdeleteop(op)
|
||||||
|
refpos += len
|
||||||
|
else
|
||||||
|
error("operation $(op) is not supported")
|
||||||
|
end
|
||||||
|
push!(anchors, BioAlignments.AlignmentAnchor(seqpos, refpos, op))
|
||||||
|
end
|
||||||
|
return BioAlignments.Alignment(anchors)
|
||||||
|
end
|
||||||
|
|
||||||
function hasalignment(record::Record)
|
function hasalignment(record::Record)
|
||||||
return ismapped(record)
|
return ismapped(record)
|
||||||
|
|
|
@ -10,7 +10,7 @@ Create a data writer of the BAM file format.
|
||||||
* `output`: data sink
|
* `output`: data sink
|
||||||
* `header`: SAM header object
|
* `header`: SAM header object
|
||||||
"""
|
"""
|
||||||
mutable struct Writer <: XAMWriter
|
mutable struct Writer <: BioGenerics.IO.AbstractWriter
|
||||||
stream::BGZFStreams.BGZFStream
|
stream::BGZFStreams.BGZFStream
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
226
src/flags.jl
226
src/flags.jl
|
@ -1,226 +0,0 @@
|
||||||
# Flags
|
|
||||||
# =========
|
|
||||||
#
|
|
||||||
|
|
||||||
"""
|
|
||||||
flag(record::XAMRecord})::UInt16
|
|
||||||
|
|
||||||
Get the bitwise flags of `record`.
|
|
||||||
The returned value is a `UInt16` of each flag being OR'd together.
|
|
||||||
The possible flags are:
|
|
||||||
|
|
||||||
0x0001 template having multiple segments in sequencing
|
|
||||||
0x0002 each segment properly aligned according to the aligner
|
|
||||||
0x0004 segment unmapped
|
|
||||||
0x0008 next segment in the template unmapped
|
|
||||||
0x0010 SEQ being reverse complemented
|
|
||||||
0x0020 SEQ of the next segment in the template being reverse complemented
|
|
||||||
0x0040 the first segment in the template
|
|
||||||
0x0080 the last segment in the template
|
|
||||||
0x0100 secondary alignment
|
|
||||||
0x0200 not passing filters, such as platform/vendor quality controls
|
|
||||||
0x0400 PCR or optical duplicate
|
|
||||||
0x0800 supplementary alignment
|
|
||||||
"""
|
|
||||||
|
|
||||||
function flag end
|
|
||||||
|
|
||||||
# Bitwise flags (or FLAG).
|
|
||||||
for (name, bits, doc) in [
|
|
||||||
(:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"),
|
|
||||||
(:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ),
|
|
||||||
(:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with FLAG_PROPER_PAIR" ),
|
|
||||||
(:MUNMAP, UInt16(0x008), "the mate is unmapped" ),
|
|
||||||
(:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ),
|
|
||||||
(:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ),
|
|
||||||
(:READ1, UInt16(0x040), "this is read1" ),
|
|
||||||
(:READ2, UInt16(0x080), "this is read2" ),
|
|
||||||
(:SECONDARY, UInt16(0x100), "not primary alignment" ),
|
|
||||||
(:QCFAIL, UInt16(0x200), "QC failure" ),
|
|
||||||
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
|
|
||||||
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
|
|
||||||
@assert bits isa UInt16 "The bits must be of type UInt16."
|
|
||||||
sym = Symbol("FLAG_", name)
|
|
||||||
docstring = """ $sym
|
|
||||||
SAM/BAM flag: $doc
|
|
||||||
|
|
||||||
See also: [`flag`](@ref)
|
|
||||||
"""
|
|
||||||
@eval begin
|
|
||||||
@doc $(docstring) const $(sym) = $(bits)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ispaired(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record`'s template has multiple segments in sequencing.
|
|
||||||
"""
|
|
||||||
function ispaired(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_PAIRED == FLAG_PAIRED
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isproperpair(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether each segment of the `record`'s template properly aligned according to the aligner.
|
|
||||||
"""
|
|
||||||
function isproperpair(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & PROPER_PAIR == PROPER_PAIR
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isunmapped(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is unmapped.
|
|
||||||
"""
|
|
||||||
function isunmapped(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_UNMAP == FLAG_UNMAP
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ismapped(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is mapped.
|
|
||||||
"""
|
|
||||||
function ismapped(record::XAMRecord)::Bool
|
|
||||||
# return flag(record) & FLAG_UNMAP == 0
|
|
||||||
return isfilled(record) && (flag(record) & FLAG_UNMAP == 0)
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ismateunmapped(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record`'s mate is unmapped.
|
|
||||||
"""
|
|
||||||
function ismateunmapped(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_MUNMAP == FLAG_MUNMAP
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isnextmapped(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Test if the mate/next read of `record` is mapped.
|
|
||||||
"""
|
|
||||||
function isnextmapped(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_MUNMAP == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isreverse(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is mapped to the reverse strand.
|
|
||||||
"""
|
|
||||||
function isreverse(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_REVERSE == FLAG_REVERSE
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isforward(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is mapped to the forward strand.
|
|
||||||
"""
|
|
||||||
function isforward(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_REVERSE == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ispositivestrand(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether `record` is aligned to the positive strand.
|
|
||||||
"""
|
|
||||||
function ispositivestrand(record::XAMRecord)::Bool
|
|
||||||
return isforward(record)
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ispositivestrand(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether `record` is aligned to the negative strand.
|
|
||||||
"""
|
|
||||||
function isnegativestrand(record::XAMRecord)::Bool
|
|
||||||
return isreverse(record)
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
ismatereverse(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record`'s mate is mapped to the reverse strand.
|
|
||||||
"""
|
|
||||||
function ismatereverse(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_MREVERSE == FLAG_MREVERSE
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isread1(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is read1.
|
|
||||||
"""
|
|
||||||
function isread1(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_READ1 == FLAG_READ1
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isread2(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is read2.
|
|
||||||
"""
|
|
||||||
function isread2(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_READ2 == FLAG_READ2
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
issecondaryalignment(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is a secondary alignment.
|
|
||||||
"""
|
|
||||||
function issecondaryalignment(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_SECONDARY == FLAG_SECONDARY
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isprimaryalignment(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is the primary alignment.
|
|
||||||
"""
|
|
||||||
function isprimaryalignment(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_SECONDARY == 0
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isqcfail(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` did not pass filters, such as platform/vendor quality controls.
|
|
||||||
"""
|
|
||||||
function isqcfail(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_QCFAIL == FLAG_QCFAIL
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isduplicate(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is a PCR or optical duplicate.
|
|
||||||
"""
|
|
||||||
function isduplicate(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_DUP == FLAG_DUP
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
issupplementaryalignment(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether the `record` is a supplementary alignment.
|
|
||||||
"""
|
|
||||||
function issupplementaryalignment(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY
|
|
||||||
end
|
|
||||||
|
|
||||||
"""
|
|
||||||
isprimary(record::XAMRecord)::Bool
|
|
||||||
|
|
||||||
Query whether `record` is a primary line of the read.
|
|
||||||
|
|
||||||
This is equivalent to `flag(record) & 0x900 == 0`.
|
|
||||||
"""
|
|
||||||
function isprimary(record::XAMRecord)::Bool
|
|
||||||
return flag(record) & 0x900 == 0
|
|
||||||
end
|
|
29
src/sam/flags.jl
Normal file
29
src/sam/flags.jl
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# SAM Flags
|
||||||
|
# =========
|
||||||
|
#
|
||||||
|
# Bitwise flags (or FLAG).
|
||||||
|
|
||||||
|
for (name, bits, doc) in [
|
||||||
|
(:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"),
|
||||||
|
(:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ),
|
||||||
|
(:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with SAM.FLAG_PROPER_PAIR" ),
|
||||||
|
(:MUNMAP, UInt16(0x008), "the mate is unmapped" ),
|
||||||
|
(:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ),
|
||||||
|
(:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ),
|
||||||
|
(:READ1, UInt16(0x040), "this is read1" ),
|
||||||
|
(:READ2, UInt16(0x080), "this is read2" ),
|
||||||
|
(:SECONDARY, UInt16(0x100), "not primary alignment" ),
|
||||||
|
(:QCFAIL, UInt16(0x200), "QC failure" ),
|
||||||
|
(:DUP, UInt16(0x400), "optical or PCR duplicate" ),
|
||||||
|
(:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),]
|
||||||
|
@assert bits isa UInt16 "The bits must be of type UInt16."
|
||||||
|
sym = Symbol("FLAG_", name)
|
||||||
|
docstring = """ $sym
|
||||||
|
SAM/BAM flag: $doc
|
||||||
|
|
||||||
|
See also: [`flag`](@ref)
|
||||||
|
"""
|
||||||
|
@eval begin
|
||||||
|
@doc $(docstring) const $(sym) = $(bits)
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,7 +1,7 @@
|
||||||
# SAM Reader
|
# SAM Reader
|
||||||
# =========
|
# =========
|
||||||
|
|
||||||
mutable struct Reader{S <: TranscodingStream} <: XAMReader
|
mutable struct Reader{S <: TranscodingStream} <: BioGenerics.IO.AbstractReader
|
||||||
state::State{S}
|
state::State{S}
|
||||||
header::Header
|
header::Header
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# SAM Record
|
# SAM Record
|
||||||
# ==========
|
# ==========
|
||||||
|
|
||||||
mutable struct Record <: XAMRecord
|
mutable struct Record
|
||||||
# Data and filled range.
|
# Data and filled range.
|
||||||
data::Vector{UInt8}
|
data::Vector{UInt8}
|
||||||
filled::UnitRange{Int} # Note: Specifies the data in use.
|
filled::UnitRange{Int} # Note: Specifies the data in use.
|
||||||
|
@ -159,6 +159,26 @@ function hasflag(record::Record)
|
||||||
return isfilled(record)
|
return isfilled(record)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
ismapped(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if `record` is mapped.
|
||||||
|
"""
|
||||||
|
function ismapped(record::Record)::Bool
|
||||||
|
return isfilled(record) && (flag(record) & FLAG_UNMAP == 0)
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
isprimary(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if `record` is a primary line of the read.
|
||||||
|
|
||||||
|
This is equivalent to `flag(record) & 0x900 == 0`.
|
||||||
|
"""
|
||||||
|
function isprimary(record::Record)::Bool
|
||||||
|
return flag(record) & 0x900 == 0
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
refname(record::Record)::String
|
refname(record::Record)::String
|
||||||
|
|
||||||
|
@ -207,6 +227,15 @@ function hasrightposition(record::Record)
|
||||||
return hasposition(record) && hasalignment(record)
|
return hasposition(record) && hasalignment(record)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
isnextmapped(record::Record)::Bool
|
||||||
|
|
||||||
|
Test if the mate/next read of `record` is mapped.
|
||||||
|
"""
|
||||||
|
function isnextmapped(record::Record)::Bool
|
||||||
|
return isfilled(record) && (flag(record) & FLAG_MUNMAP == 0)
|
||||||
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
nextrefname(record::Record)::String
|
nextrefname(record::Record)::String
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,7 @@ import BioGenerics.Exceptions: missingerror
|
||||||
import BioGenerics.Automa: State
|
import BioGenerics.Automa: State
|
||||||
import BioSequences
|
import BioSequences
|
||||||
import TranscodingStreams: TranscodingStreams, TranscodingStream
|
import TranscodingStreams: TranscodingStreams, TranscodingStream
|
||||||
import ..XAM: flag, XAMRecord, XAMReader, XAMWriter,
|
import ..XAM: flag
|
||||||
ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API.
|
|
||||||
|
|
||||||
using Printf: @sprintf
|
using Printf: @sprintf
|
||||||
|
|
||||||
|
@ -49,6 +48,7 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
include("flags.jl")
|
||||||
include("metainfo.jl")
|
include("metainfo.jl")
|
||||||
include("record.jl")
|
include("record.jl")
|
||||||
include("header.jl")
|
include("header.jl")
|
||||||
|
|
|
@ -10,7 +10,7 @@ Create a data writer of the SAM file format.
|
||||||
* `output`: data sink
|
* `output`: data sink
|
||||||
* `header=Header()`: SAM header object
|
* `header=Header()`: SAM header object
|
||||||
"""
|
"""
|
||||||
mutable struct Writer <: XAMWriter
|
mutable struct Writer <: BioGenerics.IO.AbstractWriter
|
||||||
stream::IO
|
stream::IO
|
||||||
|
|
||||||
function Writer(output::IO, header::Header=Header())
|
function Writer(output::IO, header::Header=Header())
|
||||||
|
|
|
@ -79,10 +79,10 @@
|
||||||
@test BAM.flag(record) === UInt16(16)
|
@test BAM.flag(record) === UInt16(16)
|
||||||
@test BAM.cigar(record) == "27M1D73M"
|
@test BAM.cigar(record) == "27M1D73M"
|
||||||
@test BAM.alignment(record) == Alignment([
|
@test BAM.alignment(record) == Alignment([
|
||||||
AlignmentAnchor( 0, 1, 0, OP_START),
|
AlignmentAnchor( 0, 1, OP_START),
|
||||||
AlignmentAnchor( 27, 28, 27, OP_MATCH),
|
AlignmentAnchor( 27, 28, OP_MATCH),
|
||||||
AlignmentAnchor( 27, 29, 28, OP_DELETE),
|
AlignmentAnchor( 27, 29, OP_DELETE),
|
||||||
AlignmentAnchor(100, 102, 101, OP_MATCH)])
|
AlignmentAnchor(100, 102, OP_MATCH)])
|
||||||
@test record["XG"] == 1
|
@test record["XG"] == 1
|
||||||
@test record["XM"] == 5
|
@test record["XM"] == 5
|
||||||
@test record["XN"] == 0
|
@test record["XN"] == 0
|
||||||
|
@ -209,14 +209,6 @@
|
||||||
close(reader)
|
close(reader)
|
||||||
close(writer)
|
close(writer)
|
||||||
|
|
||||||
|
|
||||||
# Check that EOF_BLOCK gets written.
|
|
||||||
nbytes = filesize(path)
|
|
||||||
@test BAM.BGZFStreams.EOF_BLOCK == open(path) do io
|
|
||||||
seek(io, nbytes - length(BAM.BGZFStreams.EOF_BLOCK))
|
|
||||||
read(io)
|
|
||||||
end
|
|
||||||
|
|
||||||
reader = open(BAM.Reader, path)
|
reader = open(BAM.Reader, path)
|
||||||
|
|
||||||
@test header(reader) == header_original
|
@test header(reader) == header_original
|
||||||
|
@ -249,21 +241,6 @@
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "BAI" begin
|
|
||||||
|
|
||||||
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
|
|
||||||
|
|
||||||
index = BAM.BAI(filepath * ".bai")
|
|
||||||
reader = open(BAM.Reader, filepath, index=index)
|
|
||||||
|
|
||||||
@test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator)
|
|
||||||
|
|
||||||
close(reader)
|
|
||||||
|
|
||||||
@test_throws ErrorException open(BAM.Reader, filepath, index=1234)
|
|
||||||
|
|
||||||
end
|
|
||||||
|
|
||||||
@testset "Random access" begin
|
@testset "Random access" begin
|
||||||
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
|
filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam")
|
||||||
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
|
reader = open(BAM.Reader, filepath, index=filepath * ".bai")
|
||||||
|
|
|
@ -104,10 +104,10 @@
|
||||||
@test SAM.flag(record) == 16
|
@test SAM.flag(record) == 16
|
||||||
@test SAM.cigar(record) == "27M1D73M"
|
@test SAM.cigar(record) == "27M1D73M"
|
||||||
@test SAM.alignment(record) == Alignment([
|
@test SAM.alignment(record) == Alignment([
|
||||||
AlignmentAnchor( 0, 1, 0, OP_START),
|
AlignmentAnchor( 0, 1, OP_START),
|
||||||
AlignmentAnchor( 27, 28, 27, OP_MATCH),
|
AlignmentAnchor( 27, 28, OP_MATCH),
|
||||||
AlignmentAnchor( 27, 29, 28, OP_DELETE),
|
AlignmentAnchor( 27, 29, OP_DELETE),
|
||||||
AlignmentAnchor(100, 102, 101, OP_MATCH)])
|
AlignmentAnchor(100, 102, OP_MATCH)])
|
||||||
@test record["XG"] == 1
|
@test record["XG"] == 1
|
||||||
@test record["XM"] == 5
|
@test record["XM"] == 5
|
||||||
@test record["XN"] == 0
|
@test record["XN"] == 0
|
||||||
|
|
Loading…
Reference in a new issue