1
0
Fork 0
mirror of https://github.com/MillironX/XAM.jl.git synced 2024-12-25 06:08:19 +00:00

Merge branch 'feature/transcodingstreams' into develop

This commit is contained in:
Ciarán O'Mara 2020-03-16 21:56:24 +11:00
commit 2aaab4d9ab
11 changed files with 545 additions and 273 deletions

View file

@ -7,22 +7,22 @@ version = "0.1.1"
Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e"
BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81" BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446"
Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d" Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
[compat] [compat]
Automa = "0.7, 0.8" Automa = "0.7, 0.8"
BGZFStreams = "0.3" BGZFStreams = "0.3"
BioAlignments = "2" BioAlignments = "2"
BioCore = "2" BioGenerics = "0.1"
BioSequences = "2" BioSequences = "2"
BufferedStreams = "1"
GenomicFeatures = "2" GenomicFeatures = "2"
Indexes = "0.1" Indexes = "0.1"
TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
julia = "1.1" julia = "1.1"
[extras] [extras]

View file

@ -3,7 +3,7 @@
module BAM module BAM
using BioCore using BioGenerics
using GenomicFeatures using GenomicFeatures
using XAM.SAM using XAM.SAM
@ -11,7 +11,7 @@ import BGZFStreams
import BioAlignments import BioAlignments
import Indexes import Indexes
import BioSequences import BioSequences
import BioCore: isfilled, header import BioGenerics: isfilled, header
import GenomicFeatures: eachoverlap import GenomicFeatures: eachoverlap

View file

@ -10,7 +10,7 @@ Create a data reader of the BAM file format.
* `input`: data source * `input`: data source
* `index=nothing`: filepath to a random access index (currently *bai* is supported) * `index=nothing`: filepath to a random access index (currently *bai* is supported)
""" """
mutable struct Reader{T} <: BioCore.IO.AbstractReader mutable struct Reader{T} <: BioGenerics.IO.AbstractReader
stream::BGZFStreams.BGZFStream{T} stream::BGZFStreams.BGZFStream{T}
header::SAM.Header header::SAM.Header
start_offset::BGZFStreams.VirtualOffset start_offset::BGZFStreams.VirtualOffset
@ -23,7 +23,7 @@ function Base.eltype(::Type{Reader{T}}) where T
return Record return Record
end end
function BioCore.IO.stream(reader::Reader) function BioGenerics.IO.stream(reader::Reader)
return reader.stream return reader.stream
end end

View file

@ -566,42 +566,42 @@ function Base.values(record::Record)
end end
# BioCore Methods # BioGenerics Methods
# ----------- # -----------
function BioCore.isfilled(record::Record) function BioGenerics.isfilled(record::Record)
return record.block_size != 0 return record.block_size != 0
end end
function BioCore.seqname(record::Record) function BioGenerics.seqname(record::Record)
return tempname(record) return tempname(record)
end end
function BioCore.hasseqname(record::Record) function BioGenerics.hasseqname(record::Record)
return hastempname(record) return hastempname(record)
end end
function BioCore.sequence(record::Record) function BioGenerics.sequence(record::Record)
return sequence(record) return sequence(record)
end end
function BioCore.hassequence(record::Record) function BioGenerics.hassequence(record::Record)
return hassequence(record) return hassequence(record)
end end
function BioCore.leftposition(record::Record) function BioGenerics.leftposition(record::Record)
return position(record) return position(record)
end end
function BioCore.hasleftposition(record::Record) function BioGenerics.hasleftposition(record::Record)
return hasposition(record) return hasposition(record)
end end
function BioCore.rightposition(record::Record) function BioGenerics.rightposition(record::Record)
return rightposition(record) return rightposition(record)
end end
function BioCore.hasrightposition(record::Record) function BioGenerics.hasrightposition(record::Record)
return hasrightposition(record) return hasrightposition(record)
end end

View file

@ -10,7 +10,7 @@ Create a data writer of the BAM file format.
* `output`: data sink * `output`: data sink
* `header`: SAM header object * `header`: SAM header object
""" """
mutable struct Writer <: BioCore.IO.AbstractWriter mutable struct Writer <: BioGenerics.IO.AbstractWriter
stream::BGZFStreams.BGZFStream stream::BGZFStreams.BGZFStream
end end
@ -25,7 +25,7 @@ function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header)
return Writer(stream) return Writer(stream)
end end
function BioCore.IO.stream(writer::Writer) function BioGenerics.IO.stream(writer::Writer)
return writer.stream return writer.stream
end end

View file

@ -1,16 +1,22 @@
# SAM Reader # SAM Reader
# ========= # =========
mutable struct Reader <: BioCore.IO.AbstractReader mutable struct Reader{S <: TranscodingStream} <: BioGenerics.IO.AbstractReader
state::BioCore.Ragel.State state::State{S}
header::Header header::Header
end
function Reader(input::BufferedStreams.BufferedInputStream) function Reader(state::State{S}) where {S <: TranscodingStream}
reader = new(BioCore.Ragel.State(sam_header_machine.start_state, input), Header())
readheader!(reader) rdr = Reader(state, Header())
reader.state.cs = sam_body_machine.start_state
return reader cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
end
rdr.state.state = sam_machine_body.start_state
rdr.state.linenum = ln
rdr.state.filled = false
return rdr
end end
""" """
@ -22,10 +28,22 @@ Create a data reader of the SAM file format.
* `input`: data source * `input`: data source
""" """
function Reader(input::IO) function Reader(input::IO)
return Reader(BufferedStreams.BufferedInputStream(input))
if input isa TranscodingStream
return Reader(State(input, 1, 1, false))
end
stream = TranscodingStreams.NoopStream(input)
return Reader(State(stream, 1, 1, false))
end end
function BioCore.IO.stream(reader::Reader) function Base.eltype(::Type{<:Reader})
return Record
end
function BioGenerics.IO.stream(reader::Reader)
return reader.state.stream return reader.state.stream
end end
@ -38,224 +56,42 @@ function header(reader::Reader)::Header
return reader.header return reader.header
end end
function Base.eltype(::Type{Reader}) function Base.close(reader::Reader)
return Record if reader.state.stream isa IO
close(reader.state.stream)
end
return nothing
end end
# file = header . body function index!(record::MetaInfo)
# header = metainfo* stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
# body = record* found = index!(stream, record)
isinteractive() && info("compiling SAM") if !found
const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_machine = (function () throw(ArgumentError("invalid SAM metadata"))
cat = Automa.RegExp.cat
rep = Automa.RegExp.rep
alt = Automa.RegExp.alt
opt = Automa.RegExp.opt
any = Automa.RegExp.any
metainfo = let
tag = re"[A-Z][A-Z]" \ cat("CO")
tag.actions[:enter] = [:mark1]
tag.actions[:exit] = [:metainfo_tag]
dict = let
key = re"[A-Za-z][A-Za-z0-9]"
key.actions[:enter] = [:mark2]
key.actions[:exit] = [:metainfo_dict_key]
val = re"[ -~]+"
val.actions[:enter] = [:mark2]
val.actions[:exit] = [:metainfo_dict_val]
keyval = cat(key, ':', val)
cat(keyval, rep(cat('\t', keyval)))
end end
dict.actions[:enter] = [:mark1] return record
dict.actions[:exit] = [:metainfo_val] end
co = cat("CO") function index!(record::Record)
co.actions[:enter] = [:mark1] stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
co.actions[:exit] = [:metainfo_tag] found = index!(stream, record)
if !found
comment = re"[^\r\n]*" throw(ArgumentError("invalid SAM record"))
comment.actions[:enter] = [:mark1]
comment.actions[:exit] = [:metainfo_val]
cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment)))
end end
metainfo.actions[:enter] = [:anchor] return record
metainfo.actions[:exit] = [:metainfo] end
record = let function Base.read!(rdr::Reader, rec::Record)
qname = re"[!-?A-~]+"
qname.actions[:enter] = [:mark]
qname.actions[:exit] = [:record_qname]
flag = re"[0-9]+" cs, ln, f = readrecord!(rdr.state.stream, rec, (rdr.state.state, rdr.state.linenum))
flag.actions[:enter] = [:mark]
flag.actions[:exit] = [:record_flag]
rname = re"\*|[!-()+-<>-~][!-~]*" rdr.state.state = cs
rname.actions[:enter] = [:mark] rdr.state.linenum = ln
rname.actions[:exit] = [:record_rname] rdr.state.filled = f
pos = re"[0-9]+" if !f
pos.actions[:enter] = [:mark] cs == 0 && throw(EOFError())
pos.actions[:exit] = [:record_pos] throw(ArgumentError("malformed SAM file"))
mapq = re"[0-9]+"
mapq.actions[:enter] = [:mark]
mapq.actions[:exit] = [:record_mapq]
cigar = re"\*|([0-9]+[MIDNSHPX=])+"
cigar.actions[:enter] = [:mark]
cigar.actions[:exit] = [:record_cigar]
rnext = re"\*|=|[!-()+-<>-~][!-~]*"
rnext.actions[:enter] = [:mark]
rnext.actions[:exit] = [:record_rnext]
pnext = re"[0-9]+"
pnext.actions[:enter] = [:mark]
pnext.actions[:exit] = [:record_pnext]
tlen = re"[-+]?[0-9]+"
tlen.actions[:enter] = [:mark]
tlen.actions[:exit] = [:record_tlen]
seq = re"\*|[A-Za-z=.]+"
seq.actions[:enter] = [:mark]
seq.actions[:exit] = [:record_seq]
qual = re"[!-~]+"
qual.actions[:enter] = [:mark]
qual.actions[:exit] = [:record_qual]
field = let
tag = re"[A-Za-z][A-Za-z0-9]"
val = alt(
re"A:[!-~]",
re"i:[-+]?[0-9]+",
re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?",
re"Z:[ !-~]*",
re"H:([0-9A-F][0-9A-F])*",
re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+")
cat(tag, ':', val)
end end
field.actions[:enter] = [:mark] return rec
field.actions[:exit] = [:record_field] end
cat(
qname, '\t',
flag, '\t',
rname, '\t',
pos, '\t',
mapq, '\t',
cigar, '\t',
rnext, '\t',
pnext, '\t',
tlen, '\t',
seq, '\t',
qual,
rep(cat('\t', field)))
end
record.actions[:enter] = [:anchor]
record.actions[:exit] = [:record]
newline = let
lf = re"\n"
lf.actions[:enter] = [:countline]
cat(re"\r?", lf)
end
header = rep(cat(metainfo, newline))
header.actions[:exit] = [:header]
header = cat(header, opt(any() \ cat('@'))) # look ahead
body = rep(cat(record, newline))
return map(Automa.compile, (metainfo, record, header, body))
end)()
const sam_metainfo_actions = Dict(
:metainfo_tag => :(record.tag = (mark1:p-1) .- offset),
:metainfo_val => :(record.val = (mark1:p-1) .- offset),
:metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)),
:metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)),
:metainfo => quote
BioCore.ReaderHelper.resize_and_copy!(record.data, data, offset+1:p-1)
record.filled = (offset+1:p-1) .- offset
end,
:anchor => :(),
:mark1 => :(mark1 = p),
:mark2 => :(mark2 = p))
eval(
BioCore.ReaderHelper.generate_index_function(
MetaInfo,
sam_metainfo_machine,
:(mark1 = mark2 = offset = 0),
sam_metainfo_actions))
eval(
BioCore.ReaderHelper.generate_readheader_function(
Reader,
MetaInfo,
sam_header_machine,
:(mark1 = mark2 = offset = 0),
merge(sam_metainfo_actions, Dict(
:metainfo => quote
BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1)
record.filled = (offset+1:p-1) .- offset
@assert isfilled(record)
push!(reader.header.metainfo, record)
BioCore.ReaderHelper.ensure_margin!(stream)
record = MetaInfo()
end,
:header => :(finish_header = true; @escape),
:countline => :(linenum += 1),
:anchor => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1))),
quote
if !eof(stream)
stream.position -= 1 # cancel look-ahead
end
end))
const sam_record_actions = Dict(
:record_qname => :(record.qname = (mark:p-1) .- offset),
:record_flag => :(record.flag = (mark:p-1) .- offset),
:record_rname => :(record.rname = (mark:p-1) .- offset),
:record_pos => :(record.pos = (mark:p-1) .- offset),
:record_mapq => :(record.mapq = (mark:p-1) .- offset),
:record_cigar => :(record.cigar = (mark:p-1) .- offset),
:record_rnext => :(record.rnext = (mark:p-1) .- offset),
:record_pnext => :(record.pnext = (mark:p-1) .- offset),
:record_tlen => :(record.tlen = (mark:p-1) .- offset),
:record_seq => :(record.seq = (mark:p-1) .- offset),
:record_qual => :(record.qual = (mark:p-1) .- offset),
:record_field => :(push!(record.fields, (mark:p-1) .- offset)),
:record => quote
BioCore.ReaderHelper.resize_and_copy!(record.data, data, 1:p-1)
record.filled = (offset+1:p-1) .- offset
end,
:anchor => :(),
:mark => :(mark = p))
eval(
BioCore.ReaderHelper.generate_index_function(
Record,
sam_record_machine,
:(mark = offset = 0),
sam_record_actions))
eval(
BioCore.ReaderHelper.generate_read_function(
Reader,
sam_body_machine,
:(mark = offset = 0),
merge(sam_record_actions, Dict(
:record => quote
BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1)
record.filled = (offset+1:p-1) .- offset
found_record = true
@escape
end,
:countline => :(linenum += 1),
:anchor => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1)))))

395
src/sam/readrecord.jl Normal file
View file

@ -0,0 +1,395 @@
# Automa.jl generated readrecord! and readmetainfo! functions
# ========================================
# file = header . body
# header = metainfo*
# body = record*
const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = (function ()
isinteractive() && info("compiling SAM")
cat = Automa.RegExp.cat
rep = Automa.RegExp.rep
alt = Automa.RegExp.alt
opt = Automa.RegExp.opt
any = Automa.RegExp.any
metainfo = let
tag = re"[A-Z][A-Z]" \ cat("CO")
tag.actions[:enter] = [:pos1]
tag.actions[:exit] = [:metainfo_tag]
dict = let
key = re"[A-Za-z][A-Za-z0-9]"
key.actions[:enter] = [:pos2]
key.actions[:exit] = [:metainfo_dict_key]
val = re"[ -~]+"
val.actions[:enter] = [:pos2]
val.actions[:exit] = [:metainfo_dict_val]
keyval = cat(key, ':', val)
cat(keyval, rep(cat('\t', keyval)))
end
dict.actions[:enter] = [:pos1]
dict.actions[:exit] = [:metainfo_val]
co = cat("CO")
co.actions[:enter] = [:pos1]
co.actions[:exit] = [:metainfo_tag]
comment = re"[^\r\n]*"
comment.actions[:enter] = [:pos1]
comment.actions[:exit] = [:metainfo_val]
cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment)))
end
metainfo.actions[:enter] = [:mark]
metainfo.actions[:exit] = [:metainfo]
record = let
qname = re"[!-?A-~]+"
qname.actions[:enter] = [:pos]
qname.actions[:exit] = [:record_qname]
flag = re"[0-9]+"
flag.actions[:enter] = [:pos]
flag.actions[:exit] = [:record_flag]
rname = re"\*|[!-()+-<>-~][!-~]*"
rname.actions[:enter] = [:pos]
rname.actions[:exit] = [:record_rname]
pos = re"[0-9]+"
pos.actions[:enter] = [:pos]
pos.actions[:exit] = [:record_pos]
mapq = re"[0-9]+"
mapq.actions[:enter] = [:pos]
mapq.actions[:exit] = [:record_mapq]
cigar = re"\*|([0-9]+[MIDNSHPX=])+"
cigar.actions[:enter] = [:pos]
cigar.actions[:exit] = [:record_cigar]
rnext = re"\*|=|[!-()+-<>-~][!-~]*"
rnext.actions[:enter] = [:pos]
rnext.actions[:exit] = [:record_rnext]
pnext = re"[0-9]+"
pnext.actions[:enter] = [:pos]
pnext.actions[:exit] = [:record_pnext]
tlen = re"[-+]?[0-9]+"
tlen.actions[:enter] = [:pos]
tlen.actions[:exit] = [:record_tlen]
seq = re"\*|[A-Za-z=.]+"
seq.actions[:enter] = [:pos]
seq.actions[:exit] = [:record_seq]
qual = re"[!-~]+"
qual.actions[:enter] = [:pos]
qual.actions[:exit] = [:record_qual]
field = let
tag = re"[A-Za-z][A-Za-z0-9]"
val = alt(
re"A:[!-~]",
re"i:[-+]?[0-9]+",
re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?",
re"Z:[ !-~]*",
re"H:([0-9A-F][0-9A-F])*",
re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+")
cat(tag, ':', val)
end
field.actions[:enter] = [:pos]
field.actions[:exit] = [:record_field]
cat(
qname, '\t',
flag, '\t',
rname, '\t',
pos, '\t',
mapq, '\t',
cigar, '\t',
rnext, '\t',
pnext, '\t',
tlen, '\t',
seq, '\t',
qual,
rep(cat('\t', field)))
end
record.actions[:enter] = [:mark]
record.actions[:exit] = [:record]
newline = let
lf = re"\n"
lf.actions[:enter] = [:countline]
cat(re"\r?", lf)
end
header = rep(cat(metainfo, newline))
header.actions[:exit] = [:header]
header = cat(header, opt(any() \ cat('@'))) # look ahead
body = rep(cat(record, newline))
body.actions[:exit] = [:body]
sam = cat(header, body)
return map(Automa.compile, (metainfo, record, header, body, sam))
end)()
# write("sam_machine_metainfo.dot", Automa.machine2dot(sam_machine_metainfo))
# run(`dot -Tsvg -o sam_machine_metainfo.svg sam_machine_metainfo.dot`)
#
# write("sam_machine_record.dot", Automa.machine2dot(sam_machine_record))
# run(`dot -Tsvg -o sam_machine_record.svg sam_machine_record.dot`)
#
# write("sam_machine_header.dot", Automa.machine2dot(sam_machine_header))
# run(`dot -Tsvg -o sam_machine_header.svg sam_machine_header.dot`)
#
# write("sam_machine_body.dot", Automa.machine2dot(sam_machine_body))
# run(`dot -Tsvg -o sam_machine_body.svg sam_machine_body.dot`)
#
# write("sam_machine.dot", Automa.machine2dot(sam_machine))
# run(`dot -Tsvg -o sam_machine.svg sam_machine.dot`)
function appendfrom!(dst, dpos, src, spos, n)
if length(dst) < dpos + n - 1
resize!(dst, dpos + n - 1)
end
copyto!(dst, dpos, src, spos, n)
return dst
end
const sam_actions_metainfo = Dict(
:mark => :(@mark),
:pos1 => :(pos1 = @relpos(p)),
:pos2 => :(pos2 = @relpos(p)),
:metainfo_tag => :(metainfo.tag = pos1:@relpos(p-1)),
:metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
:metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
:metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
:metainfo => quote
let markpos = @markpos()
appendfrom!(metainfo.data, 1, data, markpos, length(markpos:p-1))
metainfo.filled = @relpos(markpos):@relpos(p-1)
found_metainfo = true
end
end
)
const sam_actions_header = merge(
sam_actions_metainfo,
Dict(
:countline => :(linenum += 1),
:header => quote
finish_header = true
if !eof(stream)
p -= 1 # cancel look-ahead
end
@escape
end
)
)
const sam_actions_record = Dict(
:mark => :(@mark),
:pos => :(pos = @relpos(p)),
:record_qname => :(record.qname = pos:@relpos(p-1)),
:record_flag => :(record.flag = pos:@relpos(p-1)),
:record_rname => :(record.rname = pos:@relpos(p-1)),
:record_pos => :(record.pos = pos:@relpos(p-1)),
:record_mapq => :(record.mapq = pos:@relpos(p-1)),
:record_cigar => :(record.cigar = pos:@relpos(p-1)),
:record_rnext => :(record.rnext = pos:@relpos(p-1)),
:record_pnext => :(record.pnext = pos:@relpos(p-1)),
:record_tlen => :(record.tlen = pos:@relpos(p-1)),
:record_seq => :(record.seq = pos:@relpos(p-1)),
:record_qual => :(record.qual = pos:@relpos(p-1)),
:record_field => :(push!(record.fields, pos:@relpos(p-1))),
:record => quote
let markpos = @markpos()
appendfrom!(record.data, 1, data, markpos, length(markpos:p-1))
record.filled = @relpos(markpos):@relpos(p-1)
found_record = true
@escape
end
end
)
const sam_actions_body = merge(
sam_actions_record,
Dict(
:countline => :(linenum += 1),
:body => quote
finish_body = true
@escape
end
)
)
# const sam_actions = merge(
# sam_actions_header,
# sam_actions_body
# )
const sam_context = Automa.CodeGenContext(
generator = :goto,
checkbounds = false,
loopunroll = 0
)
const sam_initcode_metainfo = quote
pos1 = 0
pos2 = 0
found_metainfo = false
end
const sam_initcode_record = quote
pos = 0
found_record = false
end
const sam_initcode_header = quote
$(sam_initcode_metainfo)
metainfo = MetaInfo()
finish_header = false
cs, linenum = state
end
const sam_initcode_body = quote
$(sam_initcode_record)
finish_body = false
cs, linenum = state
end
const sam_loopcode_metainfo = quote
if cs < 0
throw(ArgumentError("malformed metainfo at pos $(p)"))
end
if found_metainfo
@goto __return__
end
end
const sam_returncode_metainfo = quote
return found_metainfo
end
Automa.Stream.generate_reader(
:index!,
sam_machine_metainfo,
arguments = (:(metainfo::MetaInfo),),
actions = sam_actions_metainfo,
context = sam_context,
initcode = sam_initcode_metainfo,
loopcode = sam_loopcode_metainfo,
returncode = sam_returncode_metainfo
) |> eval
const sam_loopcode_header = quote
if cs < 0
throw(ArgumentError("malformed metainfo at line $(linenum)"))
end
if found_metainfo
push!(header, metainfo)
found_metainfo = false
end
metainfo = MetaInfo()
if finish_header
@goto __return__
end
end
const sam_returncode_header = quote
return cs, linenum, finish_header
end
Automa.Stream.generate_reader(
:readheader!,
sam_machine_header,
arguments = (:(header::SAM.Header), :(state::Tuple{Int,Int})),
actions = sam_actions_header,
context = sam_context,
initcode = sam_initcode_header,
loopcode = sam_loopcode_header,
returncode = sam_returncode_header
) |> eval
const sam_loopcode_record = quote
if cs < 0
throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)"))
end
# # if cs != 0
# # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
# # end
if found_record
@goto __return__
end
end
const sam_loopcode_body = quote
$(sam_loopcode_record)
if finish_body
@goto __return__
end
end
const sam_returncode_record = quote
return found_record
end
Automa.Stream.generate_reader(
:index!,
sam_machine_record,
arguments = (:(record::Record),),
actions = sam_actions_record,
context = sam_context,
initcode = sam_initcode_record,
loopcode = sam_loopcode_record,
returncode = sam_returncode_record
) |> eval
const sam_returncode_body = quote
return cs, linenum, found_record
end
Automa.Stream.generate_reader(
:readrecord!,
sam_machine_body,
arguments = (:(record::Record), :(state::Tuple{Int,Int})),
actions = sam_actions_body,
context = sam_context,
initcode = sam_initcode_body,
loopcode = sam_loopcode_body,
returncode = sam_returncode_body
) |> eval

View file

@ -514,39 +514,39 @@ end
# Bio Methods # Bio Methods
# ----------- # -----------
function BioCore.isfilled(record::Record) function BioGenerics.isfilled(record::Record)
return !isempty(record.filled) return !isempty(record.filled)
end end
function BioCore.seqname(record::Record) function BioGenerics.seqname(record::Record)
return tempname(record) return tempname(record)
end end
function BioCore.hasseqname(record::Record) function BioGenerics.hasseqname(record::Record)
return hastempname(record) return hastempname(record)
end end
function BioCore.sequence(record::Record) function BioGenerics.sequence(record::Record)
return sequence(record) return sequence(record)
end end
function BioCore.hassequence(record::Record) function BioGenerics.hassequence(record::Record)
return hassequence(record) return hassequence(record)
end end
function BioCore.rightposition(record::Record) function BioGenerics.rightposition(record::Record)
return rightposition(record) return rightposition(record)
end end
function BioCore.hasrightposition(record::Record) function BioGenerics.hasrightposition(record::Record)
return hasrightposition(record) return hasrightposition(record)
end end
function BioCore.leftposition(record::Record) function BioGenerics.leftposition(record::Record)
return position(record) return position(record)
end end
function BioCore.hasleftposition(record::Record) function BioGenerics.hasleftposition(record::Record)
return hasposition(record) return hasposition(record)
end end

View file

@ -3,23 +3,73 @@
module SAM module SAM
using BioCore using BioGenerics
import Automa import Automa
import Automa.RegExp: @re_str import Automa.RegExp: @re_str
import Automa.Stream: @mark, @markpos, @relpos, @abspos
import BioAlignments import BioAlignments
import BioCore.Exceptions: missingerror import BioGenerics: BioGenerics, isfilled, header
import BioCore.RecordHelper: unsafe_parse_decimal import BioGenerics.Exceptions: missingerror
import BioCore: isfilled, header import BioGenerics.Automa: State
import BioSequences import BioSequences
import BufferedStreams import TranscodingStreams: TranscodingStreams, TranscodingStream
using Printf: @sprintf using Printf: @sprintf
#TODO: update import BioCore.RecordHelper: unsafe_parse_decimal
# r"[0-9]+" must match `data[range]`.
function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Unsigned}
x = zero(T)
@inbounds for i in range
x = Base.Checked.checked_mul(x, 10 % T)
x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T)
end
return x
end
# r"[-+]?[0-9]+" must match `data[range]`.
function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Signed}
lo = first(range)
if data[lo] == UInt8('-')
sign = T(-1)
lo += 1
elseif data[lo] == UInt8('+')
sign = T(+1)
lo += 1
else
sign = T(+1)
end
x = zero(T)
@inbounds for i in lo:last(range)
x = Base.Checked.checked_mul(x, 10 % T)
x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T)
end
return sign * x
end
# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams).
# import BufferedStreams: BufferedStreams, BufferedInputStream
# # A type keeping track of a ragel-based parser's state.
# mutable struct State{T<:BufferedInputStream}
# stream::T # input stream
# cs::Int # current DFA state of Ragel
# linenum::Int # line number: parser is responsible for updating this
# finished::Bool # true if finished (regardless of where in the stream we are)
# end
# function State(initstate::Int, input::BufferedInputStream)
# return State(input, initstate, 1, false)
# end
include("flags.jl") include("flags.jl")
include("metainfo.jl") include("metainfo.jl")
include("record.jl") include("record.jl")
include("header.jl") include("header.jl")
include("reader.jl") include("reader.jl")
include("readrecord.jl")
include("writer.jl") include("writer.jl")
end end

View file

@ -10,7 +10,7 @@ Create a data writer of the SAM file format.
* `output`: data sink * `output`: data sink
* `header=Header()`: SAM header object * `header=Header()`: SAM header object
""" """
mutable struct Writer <: BioCore.IO.AbstractWriter mutable struct Writer <: BioGenerics.IO.AbstractWriter
stream::IO stream::IO
function Writer(output::IO, header::Header=Header()) function Writer(output::IO, header::Header=Header())
@ -20,7 +20,7 @@ mutable struct Writer <: BioCore.IO.AbstractWriter
end end
end end
function BioCore.IO.stream(writer::Writer) function BioGenerics.IO.stream(writer::Writer)
return writer.stream return writer.stream
end end

View file

@ -1,25 +1,16 @@
using Test using Test
using BioGenerics
using FormatSpecimens
using GenomicFeatures using GenomicFeatures
using XAM using XAM
import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE
using FormatSpecimens
import BGZFStreams: BGZFStream import BGZFStreams: BGZFStream
import BioCore.Exceptions: MissingFieldException import BioGenerics.Exceptions: MissingFieldException
import BioSequences: @dna_str, @aa_str import BioSequences: @dna_str, @aa_str
import BioCore:
header,
isfilled,
seqname,
hasseqname,
sequence,
hassequence,
leftposition,
rightposition,
hasleftposition,
hasrightposition
# Generate a random range within `range`. # Generate a random range within `range`.
function randrange(range) function randrange(range)
x = rand(range) x = rand(range)