mirror of
https://github.com/MillironX/XAM.jl.git
synced 2024-11-14 22:33:14 +00:00
Minimal code adjustments for working separation
- Update to use GenomicFeatures v2. - BioAlignments v2. - BioSequences v2. - Indexes v0.1.
This commit is contained in:
parent
7a56931b90
commit
1a3c986152
11 changed files with 76 additions and 31 deletions
29
Project.toml
29
Project.toml
|
@ -2,3 +2,32 @@ name = "XAM"
|
|||
uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
|
||||
authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
|
||||
version = "0.1.0"
|
||||
|
||||
[deps]
|
||||
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
|
||||
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
|
||||
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
|
||||
BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
|
||||
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
|
||||
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
|
||||
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
|
||||
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
|
||||
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
|
||||
|
||||
[compat]
|
||||
Automa = "0.7, 0.8"
|
||||
BGZFStreams = "0.3"
|
||||
BioAlignments = "2"
|
||||
BioCore = "2"
|
||||
BioSequences = "2"
|
||||
BufferedStreams = "1"
|
||||
GenomicFeatures = "2"
|
||||
Indexes = "0.1"
|
||||
julia = "1.1"
|
||||
|
||||
[extras]
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
|
||||
|
||||
[targets]
|
||||
test = ["Test", "YAML"]
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
module XAM
|
||||
|
||||
export
|
||||
BAM,
|
||||
SAM
|
||||
SAM,
|
||||
BAM
|
||||
|
||||
include("sam/sam.jl")
|
||||
include("bam/bam.jl")
|
||||
|
||||
using .SAM
|
||||
using .BAM
|
||||
|
||||
end # module
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
# An index type for the BAM file format.
|
||||
struct BAI
|
||||
# BGZF file index
|
||||
index::GenomicFeatures.Indexes.BGZFIndex
|
||||
index::Indexes.BGZFIndex
|
||||
|
||||
# number of unmapped reads
|
||||
n_no_coors::Union{Nothing, Int}
|
||||
|
@ -44,7 +44,7 @@ function read_bai(input::IO)
|
|||
|
||||
# read contents
|
||||
n_refs = read(input, Int32)
|
||||
index = GenomicFeatures.Indexes.read_bgzfindex(input, n_refs)
|
||||
index = Indexes.read_bgzfindex(input, n_refs)
|
||||
if !eof(input)
|
||||
n_no_coors = read(input, UInt64)
|
||||
else
|
||||
|
|
|
@ -3,11 +3,18 @@
|
|||
|
||||
module BAM
|
||||
|
||||
using BioCore
|
||||
using GenomicFeatures
|
||||
using XAM.SAM
|
||||
|
||||
import BGZFStreams
|
||||
import BioAlignments: BioAlignments, SAM
|
||||
import GenomicFeatures: GenomicFeatures, Interval
|
||||
import BioAlignments
|
||||
import Indexes
|
||||
import BioSequences
|
||||
import BioCore: BioCore, isfilled
|
||||
import BioCore: isfilled, header
|
||||
|
||||
import GenomicFeatures: eachoverlap
|
||||
|
||||
|
||||
include("bai.jl")
|
||||
include("auxdata.jl")
|
||||
|
|
|
@ -36,7 +36,7 @@ mutable struct OverlapIteratorState
|
|||
refindex::Int
|
||||
|
||||
# possibly overlapping chunks
|
||||
chunks::Vector{GenomicFeatures.Indexes.Chunk}
|
||||
chunks::Vector{Indexes.Chunk}
|
||||
|
||||
# current chunk index
|
||||
chunkid::Int
|
||||
|
@ -51,7 +51,7 @@ function Base.iterate(iter::OverlapIterator)
|
|||
throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
|
||||
end
|
||||
@assert iter.reader.index !== nothing
|
||||
chunks = GenomicFeatures.Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
||||
chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
|
||||
if !isempty(chunks)
|
||||
seek(iter.reader, first(chunks).start)
|
||||
end
|
||||
|
|
|
@ -66,10 +66,6 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header
|
|||
return header
|
||||
end
|
||||
|
||||
function BioCore.header(reader::Reader)
|
||||
return header(reader)
|
||||
end
|
||||
|
||||
function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset)
|
||||
seek(reader.stream, voffset)
|
||||
end
|
||||
|
|
|
@ -477,11 +477,11 @@ function hastemplength(record::Record)
|
|||
end
|
||||
|
||||
"""
|
||||
sequence(record::Record)::BioSequences.DNASequence
|
||||
sequence(record::Record)::BioSequences.LongDNASeq
|
||||
|
||||
Get the segment sequence of `record`.
|
||||
"""
|
||||
function sequence(record::Record)::BioSequences.DNASequence
|
||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||
checkfilled(record)
|
||||
seqlen = seqlength(record)
|
||||
data = Vector{UInt64}(undef, cld(seqlen, 16))
|
||||
|
@ -491,7 +491,7 @@ function sequence(record::Record)::BioSequences.DNASequence
|
|||
x = unsafe_load(src, i)
|
||||
data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
|
||||
end
|
||||
return BioSequences.DNASequence(data, 1:seqlen, false)
|
||||
return BioSequences.LongDNASeq(data, 1:seqlen, false)
|
||||
end
|
||||
|
||||
function hassequence(record::Record)
|
||||
|
|
|
@ -38,10 +38,6 @@ function header(reader::Reader)::Header
|
|||
return reader.header
|
||||
end
|
||||
|
||||
function BioCore.header(reader::Reader)
|
||||
return header(reader)
|
||||
end
|
||||
|
||||
function Base.eltype(::Type{Reader})
|
||||
return Record
|
||||
end
|
||||
|
|
|
@ -370,17 +370,17 @@ function hastemplength(record::Record)
|
|||
end
|
||||
|
||||
"""
|
||||
sequence(record::Record)::BioSequences.DNASequence
|
||||
sequence(record::Record)::BioSequences.LongDNASeq
|
||||
|
||||
Get the segment sequence of `record`.
|
||||
"""
|
||||
function sequence(record::Record)::BioSequences.DNASequence
|
||||
function sequence(record::Record)::BioSequences.LongDNASeq
|
||||
checkfilled(record)
|
||||
if ismissing(record, record.seq)
|
||||
missingerror(:sequence)
|
||||
end
|
||||
seqlen = length(record.seq)
|
||||
ret = BioSequences.DNASequence(seqlen)
|
||||
ret = BioSequences.LongDNASeq(seqlen)
|
||||
BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen)
|
||||
return ret
|
||||
end
|
||||
|
|
|
@ -3,12 +3,14 @@
|
|||
|
||||
module SAM
|
||||
|
||||
using BioCore
|
||||
|
||||
import Automa
|
||||
import Automa.RegExp: @re_str
|
||||
import BioAlignments
|
||||
import BioCore.Exceptions: missingerror
|
||||
import BioCore.RecordHelper: unsafe_parse_decimal
|
||||
import BioCore: BioCore, isfilled
|
||||
import BioCore: isfilled, header
|
||||
import BioSequences
|
||||
import BufferedStreams
|
||||
using Printf: @sprintf
|
||||
|
|
|
@ -1,13 +1,25 @@
|
|||
using Test
|
||||
using BioAlignments
|
||||
using BioSymbols
|
||||
using GenomicFeatures
|
||||
using XAM
|
||||
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
|
||||
import BGZFStreams: BGZFStream
|
||||
import BioCore.Exceptions: MissingFieldException
|
||||
import BioCore.Testing.get_bio_fmt_specimens
|
||||
import BioSequences: @dna_str, @aa_str
|
||||
import GenomicFeatures
|
||||
import YAML
|
||||
|
||||
import BioCore:
|
||||
header,
|
||||
isfilled,
|
||||
seqname,
|
||||
hasseqname,
|
||||
sequence,
|
||||
hassequence,
|
||||
leftposition,
|
||||
rightposition,
|
||||
hasleftposition,
|
||||
hasrightposition
|
||||
|
||||
# Generate a random range within `range`.
|
||||
function randrange(range)
|
||||
x = rand(range)
|
||||
|
@ -68,12 +80,12 @@ end
|
|||
record = SAM.Record()
|
||||
@test !isfilled(record)
|
||||
@test !SAM.ismapped(record)
|
||||
@test repr(record) == "BioAlignments.SAM.Record: <not filled>"
|
||||
@test repr(record) == "XAM.SAM.Record: <not filled>"
|
||||
@test_throws ArgumentError SAM.flag(record)
|
||||
|
||||
record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*")
|
||||
@test isfilled(record)
|
||||
@test occursin(r"^BioAlignments.SAM.Record:\n", repr(record))
|
||||
@test occursin(r"^XAM.SAM.Record:\n", repr(record))
|
||||
@test SAM.ismapped(record)
|
||||
@test SAM.isprimary(record)
|
||||
@test SAM.hastempname(record)
|
||||
|
@ -217,7 +229,7 @@ end
|
|||
@testset "Record" begin
|
||||
record = BAM.Record()
|
||||
@test !isfilled(record)
|
||||
@test repr(record) == "BioAlignments.BAM.Record: <not filled>"
|
||||
@test repr(record) == "XAM.BAM.Record: <not filled>"
|
||||
@test_throws ArgumentError BAM.flag(record)
|
||||
end
|
||||
|
||||
|
@ -225,7 +237,7 @@ end
|
|||
reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam"))
|
||||
@test isa(reader, BAM.Reader)
|
||||
@test eltype(reader) === BAM.Record
|
||||
@test startswith(repr(reader), "BioAlignments.BAM.Reader{IOStream}:")
|
||||
@test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:")
|
||||
|
||||
# header
|
||||
h = header(reader)
|
||||
|
|
Loading…
Reference in a new issue