mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-11-14 22:33:14 +00:00
Minimal code adjustments for working separation
- Update to use GenomicFeatures v2. - BioAlignments v2. - BioSequences v2. - Indexes v0.1.
This commit is contained in:
parent
7a56931b90
commit
1a3c986152
11 changed files with 76 additions and 31 deletions
29
Project.toml
29
Project.toml
|
@ -2,3 +2,32 @@ name = "XAM"
|
||||||
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
||||||
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
||||||
|
[deps]
|
||||||
|
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||||
|
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
|
||||||
|
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||||
|
BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
|
||||||
|
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||||
|
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
|
||||||
|
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||||
|
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
|
||||||
|
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||||
|
|
||||||
|
[compat]
|
||||||
|
Automa = "0.7, 0.8"
|
||||||
|
BGZFStreams = "0.3"
|
||||||
|
BioAlignments = "2"
|
||||||
|
BioCore = "2"
|
||||||
|
BioSequences = "2"
|
||||||
|
BufferedStreams = "1"
|
||||||
|
GenomicFeatures = "2"
|
||||||
|
Indexes = "0.1"
|
||||||
|
julia = "1.1"
|
||||||
|
|
||||||
|
[extras]
|
||||||
|
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||||
|
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
|
||||||
|
|
||||||
|
[targets]
|
||||||
|
test = ["Test", "YAML"]
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
module XAM
|
module XAM
|
||||||
|
|
||||||
export
|
export
|
||||||
BAM,
|
SAM,
|
||||||
SAM
|
BAM
|
||||||
|
|
||||||
include("sam/sam.jl")
|
include("sam/sam.jl")
|
||||||
include("bam/bam.jl")
|
include("bam/bam.jl")
|
||||||
|
|
||||||
|
using .SAM
|
||||||
|
using .BAM
|
||||||
|
|
||||||
end # module
|
end # module
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
# An index type for the BAM file format.
|
# An index type for the BAM file format.
|
||||||
struct BAI
|
struct BAI
|
||||||
# BGZF file index
|
# BGZF file index
|
||||||
index::GenomicFeatures.Indexes.BGZFIndex
|
index::Indexes.BGZFIndex
|
||||||
|
|
||||||
# number of unmapped reads
|
# number of unmapped reads
|
||||||
n_no_coors::Union{Nothing, Int}
|
n_no_coors::Union{Nothing, Int}
|
||||||
|
@ -44,7 +44,7 @@ function read_bai(input::IO)
|
||||||
|
|
||||||
# read contents
|
# read contents
|
||||||
n_refs = read(input, Int32)
|
n_refs = read(input, Int32)
|
||||||
index = GenomicFeatures.Indexes.read_bgzfindex(input, n_refs)
|
index = Indexes.read_bgzfindex(input, n_refs)
|
||||||
if !eof(input)
|
if !eof(input)
|
||||||
n_no_coors = read(input, UInt64)
|
n_no_coors = read(input, UInt64)
|
||||||
else
|
else
|
||||||
|
|
|
@ -3,11 +3,18 @@
|
||||||
|
|
||||||
module BAM
|
module BAM
|
||||||
|
|
||||||
|
using BioCore
|
||||||
|
using GenomicFeatures
|
||||||
|
using XAM.SAM
|
||||||
|
|
||||||
import BGZFStreams
|
import BGZFStreams
|
||||||
import BioAlignments: BioAlignments, SAM
|
import BioAlignments
|
||||||
import GenomicFeatures: GenomicFeatures, Interval
|
import Indexes
|
||||||
import BioSequences
|
import BioSequences
|
||||||
import BioCore: BioCore, isfilled
|
import BioCore: isfilled, header
|
||||||
|
|
||||||
|
import GenomicFeatures: eachoverlap
|
||||||
|
|
||||||
|
|
||||||
include("bai.jl")
|
include("bai.jl")
|
||||||
include("auxdata.jl")
|
include("auxdata.jl")
|
||||||
|
|
|
@ -36,7 +36,7 @@ mutable struct OverlapIteratorState
|
||||||
refindex::Int
|
refindex::Int
|
||||||
|
|
||||||
# possibly overlapping chunks
|
# possibly overlapping chunks
|
||||||
chunks::Vector{GenomicFeatures.Indexes.Chunk}
|
chunks::Vector{Indexes.Chunk}
|
||||||
|
|
||||||
# current chunk index
|
# current chunk index
|
||||||
chunkid::Int
|
chunkid::Int
|
||||||
|
@ -51,7 +51,7 @@ function Base.iterate(iter::OverlapIterator)
|
||||||
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
||||||
end
|
end
|
||||||
@assert iter.reader.index !== nothing
|
@assert iter.reader.index !== nothing
|
||||||
chunks = GenomicFeatures.Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
||||||
if !isempty(chunks)
|
if !isempty(chunks)
|
||||||
seek(iter.reader, first(chunks).start)
|
seek(iter.reader, first(chunks).start)
|
||||||
end
|
end
|
||||||
|
|
|
@ -66,10 +66,6 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
|
||||||
return header
|
return header
|
||||||
end
|
end
|
||||||
|
|
||||||
function BioCore.header(reader::Reader)
|
|
||||||
return header(reader)
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
|
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
|
||||||
seek(reader.stream, voffset)
|
seek(reader.stream, voffset)
|
||||||
end
|
end
|
||||||
|
|
|
@ -477,11 +477,11 @@ function hastemplength(record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
sequence(record::Record)::BioSequences.DNASequence
|
sequence(record::Record)::BioSequences.LongDNASeq
|
||||||
|
|
||||||
Get the segment sequence of `record`.
|
Get the segment sequence of `record`.
|
||||||
"""
|
"""
|
||||||
function sequence(record::Record)::BioSequences.DNASequence
|
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
seqlen = seqlength(record)
|
seqlen = seqlength(record)
|
||||||
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
||||||
|
@ -491,7 +491,7 @@ function sequence(record::Record)::BioSequences.DNASequence
|
||||||
x = unsafe_load(src, i)
|
x = unsafe_load(src, i)
|
||||||
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
||||||
end
|
end
|
||||||
return BioSequences.DNASequence(data, 1:seqlen, false)
|
return BioSequences.LongDNASeq(data, 1:seqlen, false)
|
||||||
end
|
end
|
||||||
|
|
||||||
function hassequence(record::Record)
|
function hassequence(record::Record)
|
||||||
|
|
|
@ -38,10 +38,6 @@ function header(reader::Reader)::Header
|
||||||
return reader.header
|
return reader.header
|
||||||
end
|
end
|
||||||
|
|
||||||
function BioCore.header(reader::Reader)
|
|
||||||
return header(reader)
|
|
||||||
end
|
|
||||||
|
|
||||||
function Base.eltype(::Type{Reader})
|
function Base.eltype(::Type{Reader})
|
||||||
return Record
|
return Record
|
||||||
end
|
end
|
||||||
|
|
|
@ -370,17 +370,17 @@ function hastemplength(record::Record)
|
||||||
end
|
end
|
||||||
|
|
||||||
"""
|
"""
|
||||||
sequence(record::Record)::BioSequences.DNASequence
|
sequence(record::Record)::BioSequences.LongDNASeq
|
||||||
|
|
||||||
Get the segment sequence of `record`.
|
Get the segment sequence of `record`.
|
||||||
"""
|
"""
|
||||||
function sequence(record::Record)::BioSequences.DNASequence
|
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||||
checkfilled(record)
|
checkfilled(record)
|
||||||
if ismissing(record, record.seq)
|
if ismissing(record, record.seq)
|
||||||
missingerror(:sequence)
|
missingerror(:sequence)
|
||||||
end
|
end
|
||||||
seqlen = length(record.seq)
|
seqlen = length(record.seq)
|
||||||
ret = BioSequences.DNASequence(seqlen)
|
ret = BioSequences.LongDNASeq(seqlen)
|
||||||
BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
|
BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,12 +3,14 @@
|
||||||
|
|
||||||
module SAM
|
module SAM
|
||||||
|
|
||||||
|
using BioCore
|
||||||
|
|
||||||
import Automa
|
import Automa
|
||||||
import Automa.RegExp: @re_str
|
import Automa.RegExp: @re_str
|
||||||
import BioAlignments
|
import BioAlignments
|
||||||
import BioCore.Exceptions: missingerror
|
import BioCore.Exceptions: missingerror
|
||||||
import BioCore.RecordHelper: unsafe_parse_decimal
|
import BioCore.RecordHelper: unsafe_parse_decimal
|
||||||
import BioCore: BioCore, isfilled
|
import BioCore: isfilled, header
|
||||||
import BioSequences
|
import BioSequences
|
||||||
import BufferedStreams
|
import BufferedStreams
|
||||||
using Printf: @sprintf
|
using Printf: @sprintf
|
||||||
|
|
|
@ -1,13 +1,25 @@
|
||||||
using Test
|
using Test
|
||||||
using BioAlignments
|
using GenomicFeatures
|
||||||
using BioSymbols
|
using XAM
|
||||||
|
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
|
||||||
import BGZFStreams: BGZFStream
|
import BGZFStreams: BGZFStream
|
||||||
import BioCore.Exceptions: MissingFieldException
|
import BioCore.Exceptions: MissingFieldException
|
||||||
import BioCore.Testing.get_bio_fmt_specimens
|
import BioCore.Testing.get_bio_fmt_specimens
|
||||||
import BioSequences: @dna_str, @aa_str
|
import BioSequences: @dna_str, @aa_str
|
||||||
import GenomicFeatures
|
|
||||||
import YAML
|
import YAML
|
||||||
|
|
||||||
|
import BioCore:
|
||||||
|
header,
|
||||||
|
isfilled,
|
||||||
|
seqname,
|
||||||
|
hasseqname,
|
||||||
|
sequence,
|
||||||
|
hassequence,
|
||||||
|
leftposition,
|
||||||
|
rightposition,
|
||||||
|
hasleftposition,
|
||||||
|
hasrightposition
|
||||||
|
|
||||||
# Generate a random range within `range`.
|
# Generate a random range within `range`.
|
||||||
function randrange(range)
|
function randrange(range)
|
||||||
x = rand(range)
|
x = rand(range)
|
||||||
|
@ -68,12 +80,12 @@ end
|
||||||
record = SAM.Record()
|
record = SAM.Record()
|
||||||
@test !isfilled(record)
|
@test !isfilled(record)
|
||||||
@test !SAM.ismapped(record)
|
@test !SAM.ismapped(record)
|
||||||
@test repr(record) == "BioAlignments.SAM.Record: <not filled>"
|
@test repr(record) == "XAM.SAM.Record: <not filled>"
|
||||||
@test_throws ArgumentError SAM.flag(record)
|
@test_throws ArgumentError SAM.flag(record)
|
||||||
|
|
||||||
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
|
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
|
||||||
@test isfilled(record)
|
@test isfilled(record)
|
||||||
@test occursin(r"^BioAlignments.SAM.Record:\n", repr(record))
|
@test occursin(r"^XAM.SAM.Record:\n", repr(record))
|
||||||
@test SAM.ismapped(record)
|
@test SAM.ismapped(record)
|
||||||
@test SAM.isprimary(record)
|
@test SAM.isprimary(record)
|
||||||
@test SAM.hastempname(record)
|
@test SAM.hastempname(record)
|
||||||
|
@ -217,7 +229,7 @@ end
|
||||||
@testset "Record" begin
|
@testset "Record" begin
|
||||||
record = BAM.Record()
|
record = BAM.Record()
|
||||||
@test !isfilled(record)
|
@test !isfilled(record)
|
||||||
@test repr(record) == "BioAlignments.BAM.Record: <not filled>"
|
@test repr(record) == "XAM.BAM.Record: <not filled>"
|
||||||
@test_throws ArgumentError BAM.flag(record)
|
@test_throws ArgumentError BAM.flag(record)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -225,7 +237,7 @@ end
|
||||||
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
||||||
@test isa(reader, BAM.Reader)
|
@test isa(reader, BAM.Reader)
|
||||||
@test eltype(reader) === BAM.Record
|
@test eltype(reader) === BAM.Record
|
||||||
@test startswith(repr(reader), "BioAlignments.BAM.Reader{IOStream}:")
|
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
|
||||||
|
|
||||||
# header
|
# header
|
||||||
h = header(reader)
|
h = header(reader)
|
||||||
|
|
Loading…
Reference in a new issue