From ef99c703170e54e2e9873c13b47f8d46325fef25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 18 Jan 2020 12:00:39 +1100 Subject: [PATCH 01/14] Flatten returns --- src/bam/auxdata.jl | 66 ++++++++++++++++++++++++++++++--------------- src/bam/overlap.jl | 12 ++++++--- src/bam/reader.jl | 2 ++ src/bam/record.jl | 5 ++-- src/sam/record.jl | 67 ++++++++++++++++++++++++++++------------------ 5 files changed, 98 insertions(+), 54 deletions(-) diff --git a/src/bam/auxdata.jl b/src/bam/auxdata.jl index 2bcfac2..3ce0050 100644 --- a/src/bam/auxdata.jl +++ b/src/bam/auxdata.jl @@ -72,12 +72,15 @@ function loadauxtype(data::Vector{UInt8}, p::Int) b == UInt8('Z') ? String : error("invalid type tag: '$(Char(b))'")) end + t = data[p] + if t == UInt8('B') return p + 2, Vector{auxtype(data[p+1])} - else - return p + 1, auxtype(t) end + + return p + 1, auxtype(t) + end function loadauxvalue(data::Vector{UInt8}, p::Int, ::Type{T}) where T @@ -105,14 +108,17 @@ end function findauxtag(data::Vector{UInt8}, start::Int, stop::Int, t1::UInt8, t2::UInt8) pos = start + while pos ≤ stop && !(data[pos] == t1 && data[pos+1] == t2) pos = next_tag_position(data, pos) end + if pos > stop return 0 - else - return pos end + + return pos + end # Find the starting position of a next tag in `data` after `p`. @@ -120,24 +126,40 @@ end function next_tag_position(data::Vector{UInt8}, p::Int) typ = Char(data[p+2]) p += 3 + if typ == 'A' - p += 1 - elseif typ == 'c' || typ == 'C' - p += 1 - elseif typ == 's' || typ == 'S' - p += 2 - elseif typ == 'i' || typ == 'I' - p += 4 - elseif typ == 'f' - p += 4 - elseif typ == 'd' - p += 8 - elseif typ == 'Z' || typ == 'H' + return p += 1 + end + + if typ == 'c' || typ == 'C' + return p += 1 + end + + if typ == 's' || typ == 'S' + return p += 2 + end + + if typ == 'i' || typ == 'I' + return p += 4 + end + + if typ == 'f' + return p += 4 + end + + if typ == 'd' + return p += 8 + end + + if typ == 'Z' || typ == 'H' while data[p] != 0x00 # NULL-terminalted string p += 1 end - p += 1 - elseif typ == 'B' + return p += 1 + + end + + if typ == 'B' eltyp = Char(data[p]) elsize = eltyp == 'c' || eltyp == 'C' ? 1 : eltyp == 's' || eltyp == 'S' ? 2 : @@ -145,9 +167,9 @@ function next_tag_position(data::Vector{UInt8}, p::Int) error("invalid type tag: '$(Char(eltyp))'") p += 1 n = unsafe_load(Ptr{Int32}(pointer(data, p))) - p += 4 + elsize * n - else - error("invalid type tag: '$(Char(typ))'") + return p += 4 + elsize * n end - return p + + error("invalid type tag: '$(Char(typ))'") + end diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index c3475ba..186ff59 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -82,14 +82,18 @@ end function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}}) rid = refid(record) + if rid < interval[1] || (rid == interval[1] && rightposition(record) < first(interval[2])) # strictly left return -1 - elseif rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) + end + + if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) # strictly right return +1 - else - # overlapping - return 0 end + + # overlapping + return 0 + end diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 906110a..301246c 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -89,6 +89,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) A = read(input, UInt8) M = read(input, UInt8) x = read(input, UInt8) + if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01 error("input was not a valid BAM file") end @@ -113,6 +114,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) voffset = isa(input.io, Base.AbstractPipe) ? BGZFStreams.VirtualOffset(0, 0) : BGZFStreams.virtualoffset(input) + return Reader( input, samreader.header, diff --git a/src/bam/record.jl b/src/bam/record.jl index 107f127..d0bde20 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -612,9 +612,10 @@ end function data_size(record::Record) if isfilled(record) return record.block_size - FIXED_FIELDS_BYTES + sizeof(record.block_size) - else - return 0 end + + return 0 + end function checkfilled(record::Record) diff --git a/src/sam/record.jl b/src/sam/record.jl index e627727..8989747 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -299,9 +299,10 @@ Get the alignment of `record`. function alignment(record::Record)::BioAlignments.Alignment if ismapped(record) return BioAlignments.Alignment(cigar(record), 1, position(record)) - else - return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[]) end + + return BioAlignments.Alignment(BioAlignments.AlignmentAnchor[]) + end function hasalignment(record::Record) @@ -474,23 +475,30 @@ function Base.getindex(record::Record, tag::AbstractString) else hi = first(record.fields[i+1]) - 2 end + if typ == UInt8('A') @assert lo == hi return Char(record.data[lo]) - elseif typ == UInt8('i') + end + if typ == UInt8('i') return unsafe_parse_decimal(Int, record.data, lo:hi) - elseif typ == UInt8('f') + end + if typ == UInt8('f') # TODO: Call a C function directly for speed? return parse(Float32, SubString(record.data[lo:hi])) - elseif typ == UInt8('Z') - return String(record.data[lo:hi]) - elseif typ == UInt8('H') - return parse_hexarray(record.data, lo:hi) - elseif typ == UInt8('B') - return parse_typedarray(record.data, lo:hi) - else - throw(ArgumentError("type code '$(Char(typ))' is not defined")) end + if typ == UInt8('Z') + return String(record.data[lo:hi]) + end + if typ == UInt8('H') + return parse_hexarray(record.data, lo:hi) + end + if typ == UInt8('B') + return parse_typedarray(record.data, lo:hi) + end + + throw(ArgumentError("type code '$(Char(typ))' is not defined")) + end function Base.keys(record::Record) @@ -602,21 +610,28 @@ function parse_typedarray(data::Vector{UInt8}, range::UnitRange{Int}) xs = split(String(data[first(range)+2:last(range)])) if t == UInt8('c') return [parse(Int8, x) for x in xs] - elseif t == UInt8('C') - return [parse(UInt8, x) for x in xs] - elseif t == UInt8('s') - return [parse(Int16, x) for x in xs] - elseif t == UInt8('S') - return [parse(UInt16, x) for x in xs] - elseif t == UInt8('i') - return [parse(Int32, x) for x in xs] - elseif t == UInt8('I') - return [parse(UInt32, x) for x in xs] - elseif t == UInt8('f') - return [parse(Float32, x) for x in xs] - else - throw(ArgumentError("type code '$(Char(t))' is not defined")) end + if t == UInt8('C') + return [parse(UInt8, x) for x in xs] + end + if t == UInt8('s') + return [parse(Int16, x) for x in xs] + end + if t == UInt8('S') + return [parse(UInt16, x) for x in xs] + end + if t == UInt8('i') + return [parse(Int32, x) for x in xs] + end + if t == UInt8('I') + return [parse(UInt32, x) for x in xs] + end + if t == UInt8('f') + return [parse(Float32, x) for x in xs] + end + + throw(ArgumentError("type code '$(Char(t))' is not defined")) + end function ismissing(record::Record, range::UnitRange{Int}) From 789cccbaff4c0a58fa6418c3ba0e90ce794114e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 2 Feb 2020 21:32:02 +1100 Subject: [PATCH 02/14] Flatten elseif --- src/bam/overlap.jl | 3 ++- src/bam/reader.jl | 6 ++---- src/bam/record.jl | 3 ++- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index 186ff59..0e184c9 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -67,7 +67,8 @@ function Base.iterate(iter::OverlapIterator, state) c = compare_intervals(state.record, (state.refindex, iter.interval)) if c == 0 return copy(state.record), state - elseif c > 0 + end + if c > 0 # no more overlapping records in this chunk since records are sorted break end diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 301246c..9ec5d0d 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -30,10 +30,8 @@ end function Reader(input::IO; index=nothing) if isa(index, AbstractString) index = BAI(index) - else - if index != nothing - error("unrecognizable index argument") - end + elseif index != nothing + error("unrecognizable index argument") end reader = init_bam_reader(input) reader.index = index diff --git a/src/bam/record.jl b/src/bam/record.jl index d0bde20..ad9c6f1 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -163,7 +163,8 @@ function checked_refid(record::Record) id = refid(record) if id == 0 throw(ArgumentError("record is not mapped")) - elseif !isdefined(record, :reader) + end + if !isdefined(record, :reader) throw(ArgumentError("reader is not defined")) end return id From b0069d6f27e19ab81f03024f56273c8897e1afdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 2 Feb 2020 21:29:20 +1100 Subject: [PATCH 03/14] Preallocate --- src/bam/reader.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 9ec5d0d..0fabb8f 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -87,7 +87,7 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) A = read(input, UInt8) M = read(input, UInt8) x = read(input, UInt8) - + if B != UInt8('B') || A != UInt8('A') || M != UInt8('M') || x != 0x01 error("input was not a valid BAM file") end @@ -97,16 +97,16 @@ function init_bam_reader(input::BGZFStreams.BGZFStream) samreader = SAM.Reader(IOBuffer(read(input, textlen))) # reference sequences - refseqnames = String[] - refseqlens = Int[] n_refs = read(input, Int32) - for _ in 1:n_refs + refseqnames = Vector{String}(undef, n_refs) + refseqlens = Vector{Int}(undef, n_refs) + @inbounds for i in 1:n_refs namelen = read(input, Int32) data = read(input, namelen) seqname = unsafe_string(pointer(data)) seqlen = read(input, Int32) - push!(refseqnames, seqname) - push!(refseqlens, seqlen) + refseqnames[i] = seqname + refseqlens[i] = seqlen end voffset = isa(input.io, Base.AbstractPipe) ? From e8ad60b086d7a355718d0f4cf6a33a3bb4fd9a19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 23 Feb 2020 06:49:19 +1100 Subject: [PATCH 04/14] Use accessor functions --- src/bam/overlap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index 0e184c9..bb7b3c1 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -16,7 +16,7 @@ function Base.eltype(::Type{OverlapIterator{T}}) where T end function GenomicFeatures.eachoverlap(reader::Reader, interval::Interval) - return GenomicFeatures.eachoverlap(reader, interval.seqname, interval.first:interval.last) + return GenomicFeatures.eachoverlap(reader, GenomicFeatures.seqname(interval), GenomicFeatures.leftposition(interval):GenomicFeatures.rightposition(interval)) end function GenomicFeatures.eachoverlap(reader::Reader, interval) From c4f3c4b0df70264cfba627263f9f7fb74c51666d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 2 Feb 2020 22:50:25 +1100 Subject: [PATCH 05/14] Migrate to BioGenerics Basic find and replace. --- Project.toml | 4 ++-- src/bam/bam.jl | 4 ++-- src/bam/reader.jl | 4 ++-- src/bam/record.jl | 20 ++++++++++---------- src/bam/writer.jl | 4 ++-- src/sam/reader.jl | 30 +++++++++++++++--------------- src/sam/record.jl | 18 +++++++++--------- src/sam/sam.jl | 8 ++++---- src/sam/writer.jl | 4 ++-- test/runtests.jl | 19 +++++-------------- 10 files changed, 53 insertions(+), 62 deletions(-) diff --git a/Project.toml b/Project.toml index 10f7cb4..850b570 100644 --- a/Project.toml +++ b/Project.toml @@ -7,7 +7,7 @@ version = "0.1.1" Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" -BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81" +BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" @@ -18,7 +18,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Automa = "0.7, 0.8" BGZFStreams = "0.3" BioAlignments = "2" -BioCore = "2" +BioGenerics = "0.1" BioSequences = "2" BufferedStreams = "1" GenomicFeatures = "2" diff --git a/src/bam/bam.jl b/src/bam/bam.jl index e055231..6fd1174 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -3,7 +3,7 @@ module BAM -using BioCore +using BioGenerics using GenomicFeatures using XAM.SAM @@ -11,7 +11,7 @@ import BGZFStreams import BioAlignments import Indexes import BioSequences -import BioCore: isfilled, header +import BioGenerics: isfilled, header import GenomicFeatures: eachoverlap diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 0fabb8f..953f5ca 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -10,7 +10,7 @@ Create a data reader of the BAM file format. * `input`: data source * `index=nothing`: filepath to a random access index (currently *bai* is supported) """ -mutable struct Reader{T} <: BioCore.IO.AbstractReader +mutable struct Reader{T} <: BioGenerics.IO.AbstractReader stream::BGZFStreams.BGZFStream{T} header::SAM.Header start_offset::BGZFStreams.VirtualOffset @@ -23,7 +23,7 @@ function Base.eltype(::Type{Reader{T}}) where T return Record end -function BioCore.IO.stream(reader::Reader) +function BioGenerics.IO.stream(reader::Reader) return reader.stream end diff --git a/src/bam/record.jl b/src/bam/record.jl index ad9c6f1..13c5608 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -566,42 +566,42 @@ function Base.values(record::Record) end -# BioCore Methods +# BioGenerics Methods # ----------- -function BioCore.isfilled(record::Record) +function BioGenerics.isfilled(record::Record) return record.block_size != 0 end -function BioCore.seqname(record::Record) +function BioGenerics.seqname(record::Record) return tempname(record) end -function BioCore.hasseqname(record::Record) +function BioGenerics.hasseqname(record::Record) return hastempname(record) end -function BioCore.sequence(record::Record) +function BioGenerics.sequence(record::Record) return sequence(record) end -function BioCore.hassequence(record::Record) +function BioGenerics.hassequence(record::Record) return hassequence(record) end -function BioCore.leftposition(record::Record) +function BioGenerics.leftposition(record::Record) return position(record) end -function BioCore.hasleftposition(record::Record) +function BioGenerics.hasleftposition(record::Record) return hasposition(record) end -function BioCore.rightposition(record::Record) +function BioGenerics.rightposition(record::Record) return rightposition(record) end -function BioCore.hasrightposition(record::Record) +function BioGenerics.hasrightposition(record::Record) return hasrightposition(record) end diff --git a/src/bam/writer.jl b/src/bam/writer.jl index f017424..2460b3f 100644 --- a/src/bam/writer.jl +++ b/src/bam/writer.jl @@ -10,7 +10,7 @@ Create a data writer of the BAM file format. * `output`: data sink * `header`: SAM header object """ -mutable struct Writer <: BioCore.IO.AbstractWriter +mutable struct Writer <: BioGenerics.IO.AbstractWriter stream::BGZFStreams.BGZFStream end @@ -25,7 +25,7 @@ function Writer(stream::BGZFStreams.BGZFStream, header::SAM.Header) return Writer(stream) end -function BioCore.IO.stream(writer::Writer) +function BioGenerics.IO.stream(writer::Writer) return writer.stream end diff --git a/src/sam/reader.jl b/src/sam/reader.jl index d26a840..5e4f0ea 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -1,12 +1,12 @@ # SAM Reader # ========= -mutable struct Reader <: BioCore.IO.AbstractReader - state::BioCore.Ragel.State +mutable struct Reader <: BioGenerics.IO.AbstractReader + state::BioGenerics.Ragel.State header::Header function Reader(input::BufferedStreams.BufferedInputStream) - reader = new(BioCore.Ragel.State(sam_header_machine.start_state, input), Header()) + reader = new(BioGenerics.Ragel.State(sam_header_machine.start_state, input), Header()) readheader!(reader) reader.state.cs = sam_body_machine.start_state return reader @@ -25,7 +25,7 @@ function Reader(input::IO) return Reader(BufferedStreams.BufferedInputStream(input)) end -function BioCore.IO.stream(reader::Reader) +function BioGenerics.IO.stream(reader::Reader) return reader.state.stream end @@ -184,36 +184,36 @@ const sam_metainfo_actions = Dict( :metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)), :metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)), :metainfo => quote - BioCore.ReaderHelper.resize_and_copy!(record.data, data, offset+1:p-1) + BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, offset+1:p-1) record.filled = (offset+1:p-1) .- offset end, :anchor => :(), :mark1 => :(mark1 = p), :mark2 => :(mark2 = p)) eval( - BioCore.ReaderHelper.generate_index_function( + BioGenerics.ReaderHelper.generate_index_function( MetaInfo, sam_metainfo_machine, :(mark1 = mark2 = offset = 0), sam_metainfo_actions)) eval( - BioCore.ReaderHelper.generate_readheader_function( + BioGenerics.ReaderHelper.generate_readheader_function( Reader, MetaInfo, sam_header_machine, :(mark1 = mark2 = offset = 0), merge(sam_metainfo_actions, Dict( :metainfo => quote - BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1) + BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, BioGenerics.ReaderHelper.upanchor!(stream):p-1) record.filled = (offset+1:p-1) .- offset @assert isfilled(record) push!(reader.header.metainfo, record) - BioCore.ReaderHelper.ensure_margin!(stream) + BioGenerics.ReaderHelper.ensure_margin!(stream) record = MetaInfo() end, :header => :(finish_header = true; @escape), :countline => :(linenum += 1), - :anchor => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1))), + :anchor => :(BioGenerics.ReaderHelper.anchor!(stream, p); offset = p - 1))), quote if !eof(stream) stream.position -= 1 # cancel look-ahead @@ -234,28 +234,28 @@ const sam_record_actions = Dict( :record_qual => :(record.qual = (mark:p-1) .- offset), :record_field => :(push!(record.fields, (mark:p-1) .- offset)), :record => quote - BioCore.ReaderHelper.resize_and_copy!(record.data, data, 1:p-1) + BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, 1:p-1) record.filled = (offset+1:p-1) .- offset end, :anchor => :(), :mark => :(mark = p)) eval( - BioCore.ReaderHelper.generate_index_function( + BioGenerics.ReaderHelper.generate_index_function( Record, sam_record_machine, :(mark = offset = 0), sam_record_actions)) eval( - BioCore.ReaderHelper.generate_read_function( + BioGenerics.ReaderHelper.generate_read_function( Reader, sam_body_machine, :(mark = offset = 0), merge(sam_record_actions, Dict( :record => quote - BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1) + BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, BioGenerics.ReaderHelper.upanchor!(stream):p-1) record.filled = (offset+1:p-1) .- offset found_record = true @escape end, :countline => :(linenum += 1), - :anchor => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1))))) + :anchor => :(BioGenerics.ReaderHelper.anchor!(stream, p); offset = p - 1))))) diff --git a/src/sam/record.jl b/src/sam/record.jl index 8989747..a61e0df 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -514,39 +514,39 @@ end # Bio Methods # ----------- -function BioCore.isfilled(record::Record) +function BioGenerics.isfilled(record::Record) return !isempty(record.filled) end -function BioCore.seqname(record::Record) +function BioGenerics.seqname(record::Record) return tempname(record) end -function BioCore.hasseqname(record::Record) +function BioGenerics.hasseqname(record::Record) return hastempname(record) end -function BioCore.sequence(record::Record) +function BioGenerics.sequence(record::Record) return sequence(record) end -function BioCore.hassequence(record::Record) +function BioGenerics.hassequence(record::Record) return hassequence(record) end -function BioCore.rightposition(record::Record) +function BioGenerics.rightposition(record::Record) return rightposition(record) end -function BioCore.hasrightposition(record::Record) +function BioGenerics.hasrightposition(record::Record) return hasrightposition(record) end -function BioCore.leftposition(record::Record) +function BioGenerics.leftposition(record::Record) return position(record) end -function BioCore.hasleftposition(record::Record) +function BioGenerics.hasleftposition(record::Record) return hasposition(record) end diff --git a/src/sam/sam.jl b/src/sam/sam.jl index b917f37..d0f006b 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -3,14 +3,14 @@ module SAM -using BioCore +using BioGenerics import Automa import Automa.RegExp: @re_str import BioAlignments -import BioCore.Exceptions: missingerror -import BioCore.RecordHelper: unsafe_parse_decimal -import BioCore: isfilled, header +import BioGenerics.Exceptions: missingerror +import BioGenerics.RecordHelper: unsafe_parse_decimal +import BioGenerics: isfilled, header import BioSequences import BufferedStreams using Printf: @sprintf diff --git a/src/sam/writer.jl b/src/sam/writer.jl index e7a5ddd..801ed68 100644 --- a/src/sam/writer.jl +++ b/src/sam/writer.jl @@ -10,7 +10,7 @@ Create a data writer of the SAM file format. * `output`: data sink * `header=Header()`: SAM header object """ -mutable struct Writer <: BioCore.IO.AbstractWriter +mutable struct Writer <: BioGenerics.IO.AbstractWriter stream::IO function Writer(output::IO, header::Header=Header()) @@ -20,7 +20,7 @@ mutable struct Writer <: BioCore.IO.AbstractWriter end end -function BioCore.IO.stream(writer::Writer) +function BioGenerics.IO.stream(writer::Writer) return writer.stream end diff --git a/test/runtests.jl b/test/runtests.jl index d76d99f..10ca2d0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,25 +1,16 @@ using Test + +using BioGenerics +using FormatSpecimens using GenomicFeatures using XAM + import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE -using FormatSpecimens import BGZFStreams: BGZFStream -import BioCore.Exceptions: MissingFieldException +import BioGenerics.Exceptions: MissingFieldException import BioSequences: @dna_str, @aa_str -import BioCore: - header, - isfilled, - seqname, - hasseqname, - sequence, - hassequence, - leftposition, - rightposition, - hasleftposition, - hasrightposition - # Generate a random range within `range`. function randrange(range) x = rand(range) From 2cad552f6308b28cd7a99aa3584da98716019bc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 2 Feb 2020 22:51:19 +1100 Subject: [PATCH 06/14] Quick fix for BioCore.RecordHelper: unsafe_parse_decimal --- src/sam/sam.jl | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/sam/sam.jl b/src/sam/sam.jl index d0f006b..f7b6c92 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -9,12 +9,43 @@ import Automa import Automa.RegExp: @re_str import BioAlignments import BioGenerics.Exceptions: missingerror -import BioGenerics.RecordHelper: unsafe_parse_decimal import BioGenerics: isfilled, header import BioSequences import BufferedStreams using Printf: @sprintf + +#TODO: update import BioCore.RecordHelper: unsafe_parse_decimal +# r"[0-9]+" must match `data[range]`. +function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Unsigned} + x = zero(T) + @inbounds for i in range + x = Base.Checked.checked_mul(x, 10 % T) + x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T) + end + return x +end + +# r"[-+]?[0-9]+" must match `data[range]`. +function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Signed} + lo = first(range) + if data[lo] == UInt8('-') + sign = T(-1) + lo += 1 + elseif data[lo] == UInt8('+') + sign = T(+1) + lo += 1 + else + sign = T(+1) + end + x = zero(T) + @inbounds for i in lo:last(range) + x = Base.Checked.checked_mul(x, 10 % T) + x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T) + end + return sign * x +end + include("flags.jl") include("metainfo.jl") include("record.jl") From c59a1576755f109e989545e2510a433487b4ec3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Tue, 27 Aug 2019 08:52:35 +1000 Subject: [PATCH 07/14] Quick fix for BioCore.Ragel.State This fix is likely to be replaced when migrating to TranscodingStreams. --- src/sam/reader.jl | 4 ++-- src/sam/sam.jl | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 5e4f0ea..1dc206b 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -2,11 +2,11 @@ # ========= mutable struct Reader <: BioGenerics.IO.AbstractReader - state::BioGenerics.Ragel.State + state::State header::Header function Reader(input::BufferedStreams.BufferedInputStream) - reader = new(BioGenerics.Ragel.State(sam_header_machine.start_state, input), Header()) + reader = new(State(sam_header_machine.start_state, input), Header()) readheader!(reader) reader.state.cs = sam_body_machine.start_state return reader diff --git a/src/sam/sam.jl b/src/sam/sam.jl index f7b6c92..628b085 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -46,6 +46,21 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I return sign * x end +#TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams). +import BufferedStreams: BufferedStreams, BufferedInputStream +# A type keeping track of a ragel-based parser's state. +mutable struct State{T<:BufferedInputStream} + stream::T # input stream + cs::Int # current DFA state of Ragel + linenum::Int # line number: parser is responsible for updating this + finished::Bool # true if finished (regardless of where in the stream we are) +end + +function State(initstate::Int, input::BufferedInputStream) + return State(input, initstate, 1, false) +end + + include("flags.jl") include("metainfo.jl") include("record.jl") From 365f7922fae84beea86a350e35887f3c54d8df18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 2 Feb 2020 23:16:46 +1100 Subject: [PATCH 08/14] Quick fix for BioGenerics.ReaderHelper functions This fix is likely to be replaced when migrating to TranscodingStreams. --- src/sam/reader.jl | 191 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 180 insertions(+), 11 deletions(-) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 1dc206b..ceb7a86 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -1,6 +1,175 @@ # SAM Reader # ========= +@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true) + stream.anchor = p + stream.immobilized = immobilize + return stream +end + +@inline function upanchor!(stream::BufferedStreams.BufferedInputStream) + @assert stream.anchor != 0 "upanchor! called with no anchor set" + anchor = stream.anchor + stream.anchor = 0 + stream.immobilized = false + return anchor +end + +function ensure_margin!(stream::BufferedStreams.BufferedInputStream) + if stream.position * 20 > length(stream.buffer) * 19 + BufferedStreams.shiftdata!(stream) + end + return nothing +end + +@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int}) + return resize_and_copy!(dst, 1, src, r) +end + +@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int}) + rlen = length(r) + if length(dst) != dstart + rlen - 1 + resize!(dst, dstart + rlen - 1) + end + copyto!(dst, dstart, src, first(r), rlen) + return dst +end + +function generate_index_function(record_type, machine, init_code, actions; kwargs...) + kwargs = Dict(kwargs) + context = Automa.CodeGenContext( + generator = get(kwargs, :generator, :goto), + checkbounds = get(kwargs, :checkbounds, false), + loopunroll = get(kwargs, :loopunroll, 0) + ) + quote + function index!(record::$(record_type)) + data = record.data + p = 1 + p_end = p_eof = sizeof(data) + initialize!(record) + $(init_code) + cs = $(machine.start_state) + $(Automa.generate_exec_code(context, machine, actions)) + if cs != 0 + throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) + end + @assert isfilled(record) + return record + end + end +end + +function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:()) + quote + function readheader!(reader::$(reader_type)) + _readheader!(reader, reader.state) + end + + function _readheader!(reader::$(reader_type), state::State) + stream = state.stream + ensure_margin!(stream) + cs = state.cs + linenum = state.linenum + data = stream.buffer + p = stream.position + p_end = stream.available + p_eof = -1 + finish_header = false + record = $(metainfo_type)() + + $(init_code) + + while true + $(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions)) + + state.cs = cs + state.finished = cs == 0 + state.linenum = linenum + stream.position = p + + if cs < 0 + error("$($(reader_type)) file format error on line ", linenum) + elseif finish_header + $(finish_code) + break + elseif p > p_eof ≥ 0 + error("incomplete $($(reader_type)) input on line ", linenum) + else + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 + p = stream.position + p_end = stream.available + if hits_eof + p_eof = p_end + end + end + end + end + end +end + +function generate_read_function(reader_type, machine, init_code, actions; kwargs...) + kwargs = Dict(kwargs) + context = Automa.CodeGenContext( + generator=get(kwargs, :generator, :goto), + checkbounds=get(kwargs, :checkbounds, false), + loopunroll=get(kwargs, :loopunroll, 0) + ) + quote + function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type)) + return _read!(reader, reader.state, record) + end + + function _read!(reader::$(reader_type), state::State, record::eltype($(reader_type))) + stream = state.stream + ensure_margin!(stream) + cs = state.cs + linenum = state.linenum + data = stream.buffer + p = stream.position + p_end = stream.available + p_eof = -1 + found_record = false + initialize!(record) + + $(init_code) + + if state.finished + throw(EOFError()) + end + + while true + $(Automa.generate_exec_code(context, machine, actions)) + + state.cs = cs + state.finished |= cs == 0 + state.linenum = linenum + stream.position = p + + if cs < 0 + error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)]))) + elseif found_record + break + elseif cs == 0 + throw(EOFError()) + elseif p > p_eof ≥ 0 + error("incomplete $($(reader_type)) input on line ", linenum) + elseif BufferedStreams.available_bytes(stream) < 64 + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 + p = stream.position + p_end = stream.available + if hits_eof + p_eof = p_end + end + end + end + + @assert isfilled(record) + return record + end + end +end + mutable struct Reader <: BioGenerics.IO.AbstractReader state::State header::Header @@ -184,36 +353,36 @@ const sam_metainfo_actions = Dict( :metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)), :metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)), :metainfo => quote - BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, offset+1:p-1) + resize_and_copy!(record.data, data, offset+1:p-1) record.filled = (offset+1:p-1) .- offset end, :anchor => :(), :mark1 => :(mark1 = p), :mark2 => :(mark2 = p)) eval( - BioGenerics.ReaderHelper.generate_index_function( + generate_index_function( MetaInfo, sam_metainfo_machine, :(mark1 = mark2 = offset = 0), sam_metainfo_actions)) eval( - BioGenerics.ReaderHelper.generate_readheader_function( + generate_readheader_function( Reader, MetaInfo, sam_header_machine, :(mark1 = mark2 = offset = 0), merge(sam_metainfo_actions, Dict( :metainfo => quote - BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, BioGenerics.ReaderHelper.upanchor!(stream):p-1) + resize_and_copy!(record.data, data, upanchor!(stream):p-1) record.filled = (offset+1:p-1) .- offset @assert isfilled(record) push!(reader.header.metainfo, record) - BioGenerics.ReaderHelper.ensure_margin!(stream) + ensure_margin!(stream) record = MetaInfo() end, :header => :(finish_header = true; @escape), :countline => :(linenum += 1), - :anchor => :(BioGenerics.ReaderHelper.anchor!(stream, p); offset = p - 1))), + :anchor => :(anchor!(stream, p); offset = p - 1))), quote if !eof(stream) stream.position -= 1 # cancel look-ahead @@ -234,28 +403,28 @@ const sam_record_actions = Dict( :record_qual => :(record.qual = (mark:p-1) .- offset), :record_field => :(push!(record.fields, (mark:p-1) .- offset)), :record => quote - BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, 1:p-1) + resize_and_copy!(record.data, data, 1:p-1) record.filled = (offset+1:p-1) .- offset end, :anchor => :(), :mark => :(mark = p)) eval( - BioGenerics.ReaderHelper.generate_index_function( + generate_index_function( Record, sam_record_machine, :(mark = offset = 0), sam_record_actions)) eval( - BioGenerics.ReaderHelper.generate_read_function( + generate_read_function( Reader, sam_body_machine, :(mark = offset = 0), merge(sam_record_actions, Dict( :record => quote - BioGenerics.ReaderHelper.resize_and_copy!(record.data, data, BioGenerics.ReaderHelper.upanchor!(stream):p-1) + resize_and_copy!(record.data, data, upanchor!(stream):p-1) record.filled = (offset+1:p-1) .- offset found_record = true @escape end, :countline => :(linenum += 1), - :anchor => :(BioGenerics.ReaderHelper.anchor!(stream, p); offset = p - 1))))) + :anchor => :(anchor!(stream, p); offset = p - 1))))) From e63ce2e3986ada38e67f4e0b5477d8e541983299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 7 Sep 2019 21:42:06 +1000 Subject: [PATCH 09/14] Group Automa into readrecord.jl --- src/sam/reader.jl | 387 ----------------------------------------- src/sam/readrecord.jl | 389 ++++++++++++++++++++++++++++++++++++++++++ src/sam/sam.jl | 1 + 3 files changed, 390 insertions(+), 387 deletions(-) create mode 100644 src/sam/readrecord.jl diff --git a/src/sam/reader.jl b/src/sam/reader.jl index ceb7a86..0406165 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -1,175 +1,6 @@ # SAM Reader # ========= -@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true) - stream.anchor = p - stream.immobilized = immobilize - return stream -end - -@inline function upanchor!(stream::BufferedStreams.BufferedInputStream) - @assert stream.anchor != 0 "upanchor! called with no anchor set" - anchor = stream.anchor - stream.anchor = 0 - stream.immobilized = false - return anchor -end - -function ensure_margin!(stream::BufferedStreams.BufferedInputStream) - if stream.position * 20 > length(stream.buffer) * 19 - BufferedStreams.shiftdata!(stream) - end - return nothing -end - -@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int}) - return resize_and_copy!(dst, 1, src, r) -end - -@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int}) - rlen = length(r) - if length(dst) != dstart + rlen - 1 - resize!(dst, dstart + rlen - 1) - end - copyto!(dst, dstart, src, first(r), rlen) - return dst -end - -function generate_index_function(record_type, machine, init_code, actions; kwargs...) - kwargs = Dict(kwargs) - context = Automa.CodeGenContext( - generator = get(kwargs, :generator, :goto), - checkbounds = get(kwargs, :checkbounds, false), - loopunroll = get(kwargs, :loopunroll, 0) - ) - quote - function index!(record::$(record_type)) - data = record.data - p = 1 - p_end = p_eof = sizeof(data) - initialize!(record) - $(init_code) - cs = $(machine.start_state) - $(Automa.generate_exec_code(context, machine, actions)) - if cs != 0 - throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) - end - @assert isfilled(record) - return record - end - end -end - -function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:()) - quote - function readheader!(reader::$(reader_type)) - _readheader!(reader, reader.state) - end - - function _readheader!(reader::$(reader_type), state::State) - stream = state.stream - ensure_margin!(stream) - cs = state.cs - linenum = state.linenum - data = stream.buffer - p = stream.position - p_end = stream.available - p_eof = -1 - finish_header = false - record = $(metainfo_type)() - - $(init_code) - - while true - $(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions)) - - state.cs = cs - state.finished = cs == 0 - state.linenum = linenum - stream.position = p - - if cs < 0 - error("$($(reader_type)) file format error on line ", linenum) - elseif finish_header - $(finish_code) - break - elseif p > p_eof ≥ 0 - error("incomplete $($(reader_type)) input on line ", linenum) - else - hits_eof = BufferedStreams.fillbuffer!(stream) == 0 - p = stream.position - p_end = stream.available - if hits_eof - p_eof = p_end - end - end - end - end - end -end - -function generate_read_function(reader_type, machine, init_code, actions; kwargs...) - kwargs = Dict(kwargs) - context = Automa.CodeGenContext( - generator=get(kwargs, :generator, :goto), - checkbounds=get(kwargs, :checkbounds, false), - loopunroll=get(kwargs, :loopunroll, 0) - ) - quote - function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type)) - return _read!(reader, reader.state, record) - end - - function _read!(reader::$(reader_type), state::State, record::eltype($(reader_type))) - stream = state.stream - ensure_margin!(stream) - cs = state.cs - linenum = state.linenum - data = stream.buffer - p = stream.position - p_end = stream.available - p_eof = -1 - found_record = false - initialize!(record) - - $(init_code) - - if state.finished - throw(EOFError()) - end - - while true - $(Automa.generate_exec_code(context, machine, actions)) - - state.cs = cs - state.finished |= cs == 0 - state.linenum = linenum - stream.position = p - - if cs < 0 - error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)]))) - elseif found_record - break - elseif cs == 0 - throw(EOFError()) - elseif p > p_eof ≥ 0 - error("incomplete $($(reader_type)) input on line ", linenum) - elseif BufferedStreams.available_bytes(stream) < 64 - hits_eof = BufferedStreams.fillbuffer!(stream) == 0 - p = stream.position - p_end = stream.available - if hits_eof - p_eof = p_end - end - end - end - - @assert isfilled(record) - return record - end - end -end - mutable struct Reader <: BioGenerics.IO.AbstractReader state::State header::Header @@ -210,221 +41,3 @@ end function Base.eltype(::Type{Reader}) return Record end - -# file = header . body -# header = metainfo* -# body = record* -isinteractive() && info("compiling SAM") -const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_machine = (function () - cat = Automa.RegExp.cat - rep = Automa.RegExp.rep - alt = Automa.RegExp.alt - opt = Automa.RegExp.opt - any = Automa.RegExp.any - - metainfo = let - tag = re"[A-Z][A-Z]" \ cat("CO") - tag.actions[:enter] = [:mark1] - tag.actions[:exit] = [:metainfo_tag] - - dict = let - key = re"[A-Za-z][A-Za-z0-9]" - key.actions[:enter] = [:mark2] - key.actions[:exit] = [:metainfo_dict_key] - val = re"[ -~]+" - val.actions[:enter] = [:mark2] - val.actions[:exit] = [:metainfo_dict_val] - keyval = cat(key, ':', val) - - cat(keyval, rep(cat('\t', keyval))) - end - dict.actions[:enter] = [:mark1] - dict.actions[:exit] = [:metainfo_val] - - co = cat("CO") - co.actions[:enter] = [:mark1] - co.actions[:exit] = [:metainfo_tag] - - comment = re"[^\r\n]*" - comment.actions[:enter] = [:mark1] - comment.actions[:exit] = [:metainfo_val] - - cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment))) - end - metainfo.actions[:enter] = [:anchor] - metainfo.actions[:exit] = [:metainfo] - - record = let - qname = re"[!-?A-~]+" - qname.actions[:enter] = [:mark] - qname.actions[:exit] = [:record_qname] - - flag = re"[0-9]+" - flag.actions[:enter] = [:mark] - flag.actions[:exit] = [:record_flag] - - rname = re"\*|[!-()+-<>-~][!-~]*" - rname.actions[:enter] = [:mark] - rname.actions[:exit] = [:record_rname] - - pos = re"[0-9]+" - pos.actions[:enter] = [:mark] - pos.actions[:exit] = [:record_pos] - - mapq = re"[0-9]+" - mapq.actions[:enter] = [:mark] - mapq.actions[:exit] = [:record_mapq] - - cigar = re"\*|([0-9]+[MIDNSHPX=])+" - cigar.actions[:enter] = [:mark] - cigar.actions[:exit] = [:record_cigar] - - rnext = re"\*|=|[!-()+-<>-~][!-~]*" - rnext.actions[:enter] = [:mark] - rnext.actions[:exit] = [:record_rnext] - - pnext = re"[0-9]+" - pnext.actions[:enter] = [:mark] - pnext.actions[:exit] = [:record_pnext] - - tlen = re"[-+]?[0-9]+" - tlen.actions[:enter] = [:mark] - tlen.actions[:exit] = [:record_tlen] - - seq = re"\*|[A-Za-z=.]+" - seq.actions[:enter] = [:mark] - seq.actions[:exit] = [:record_seq] - - qual = re"[!-~]+" - qual.actions[:enter] = [:mark] - qual.actions[:exit] = [:record_qual] - - field = let - tag = re"[A-Za-z][A-Za-z0-9]" - val = alt( - re"A:[!-~]", - re"i:[-+]?[0-9]+", - re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?", - re"Z:[ !-~]*", - re"H:([0-9A-F][0-9A-F])*", - re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+") - - cat(tag, ':', val) - end - field.actions[:enter] = [:mark] - field.actions[:exit] = [:record_field] - - cat( - qname, '\t', - flag, '\t', - rname, '\t', - pos, '\t', - mapq, '\t', - cigar, '\t', - rnext, '\t', - pnext, '\t', - tlen, '\t', - seq, '\t', - qual, - rep(cat('\t', field))) - end - record.actions[:enter] = [:anchor] - record.actions[:exit] = [:record] - - newline = let - lf = re"\n" - lf.actions[:enter] = [:countline] - - cat(re"\r?", lf) - end - - header′ = rep(cat(metainfo, newline)) - header′.actions[:exit] = [:header] - header = cat(header′, opt(any() \ cat('@'))) # look ahead - - body = rep(cat(record, newline)) - - return map(Automa.compile, (metainfo, record, header, body)) -end)() - -const sam_metainfo_actions = Dict( - :metainfo_tag => :(record.tag = (mark1:p-1) .- offset), - :metainfo_val => :(record.val = (mark1:p-1) .- offset), - :metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)), - :metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)), - :metainfo => quote - resize_and_copy!(record.data, data, offset+1:p-1) - record.filled = (offset+1:p-1) .- offset - end, - :anchor => :(), - :mark1 => :(mark1 = p), - :mark2 => :(mark2 = p)) -eval( - generate_index_function( - MetaInfo, - sam_metainfo_machine, - :(mark1 = mark2 = offset = 0), - sam_metainfo_actions)) -eval( - generate_readheader_function( - Reader, - MetaInfo, - sam_header_machine, - :(mark1 = mark2 = offset = 0), - merge(sam_metainfo_actions, Dict( - :metainfo => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset - @assert isfilled(record) - push!(reader.header.metainfo, record) - ensure_margin!(stream) - record = MetaInfo() - end, - :header => :(finish_header = true; @escape), - :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1))), - quote - if !eof(stream) - stream.position -= 1 # cancel look-ahead - end - end)) - -const sam_record_actions = Dict( - :record_qname => :(record.qname = (mark:p-1) .- offset), - :record_flag => :(record.flag = (mark:p-1) .- offset), - :record_rname => :(record.rname = (mark:p-1) .- offset), - :record_pos => :(record.pos = (mark:p-1) .- offset), - :record_mapq => :(record.mapq = (mark:p-1) .- offset), - :record_cigar => :(record.cigar = (mark:p-1) .- offset), - :record_rnext => :(record.rnext = (mark:p-1) .- offset), - :record_pnext => :(record.pnext = (mark:p-1) .- offset), - :record_tlen => :(record.tlen = (mark:p-1) .- offset), - :record_seq => :(record.seq = (mark:p-1) .- offset), - :record_qual => :(record.qual = (mark:p-1) .- offset), - :record_field => :(push!(record.fields, (mark:p-1) .- offset)), - :record => quote - resize_and_copy!(record.data, data, 1:p-1) - record.filled = (offset+1:p-1) .- offset - end, - :anchor => :(), - :mark => :(mark = p)) -eval( - generate_index_function( - Record, - sam_record_machine, - :(mark = offset = 0), - sam_record_actions)) -eval( - generate_read_function( - Reader, - sam_body_machine, - :(mark = offset = 0), - merge(sam_record_actions, Dict( - :record => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset - found_record = true - @escape - end, - :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1))))) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl new file mode 100644 index 0000000..4a18064 --- /dev/null +++ b/src/sam/readrecord.jl @@ -0,0 +1,389 @@ +@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true) + stream.anchor = p + stream.immobilized = immobilize + return stream +end + +@inline function upanchor!(stream::BufferedStreams.BufferedInputStream) + @assert stream.anchor != 0 "upanchor! called with no anchor set" + anchor = stream.anchor + stream.anchor = 0 + stream.immobilized = false + return anchor +end + +function ensure_margin!(stream::BufferedStreams.BufferedInputStream) + if stream.position * 20 > length(stream.buffer) * 19 + BufferedStreams.shiftdata!(stream) + end + return nothing +end + +@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int}) + return resize_and_copy!(dst, 1, src, r) +end + +@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int}) + rlen = length(r) + if length(dst) != dstart + rlen - 1 + resize!(dst, dstart + rlen - 1) + end + copyto!(dst, dstart, src, first(r), rlen) + return dst +end + +function generate_index_function(record_type, machine, init_code, actions; kwargs...) + kwargs = Dict(kwargs) + context = Automa.CodeGenContext( + generator = get(kwargs, :generator, :goto), + checkbounds = get(kwargs, :checkbounds, false), + loopunroll = get(kwargs, :loopunroll, 0) + ) + quote + function index!(record::$(record_type)) + data = record.data + p = 1 + p_end = p_eof = sizeof(data) + initialize!(record) + $(init_code) + cs = $(machine.start_state) + $(Automa.generate_exec_code(context, machine, actions)) + if cs != 0 + throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) + end + @assert isfilled(record) + return record + end + end +end + +function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:()) + quote + function readheader!(reader::$(reader_type)) + _readheader!(reader, reader.state) + end + + function _readheader!(reader::$(reader_type), state::State) + stream = state.stream + ensure_margin!(stream) + cs = state.cs + linenum = state.linenum + data = stream.buffer + p = stream.position + p_end = stream.available + p_eof = -1 + finish_header = false + record = $(metainfo_type)() + + $(init_code) + + while true + $(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions)) + + state.cs = cs + state.finished = cs == 0 + state.linenum = linenum + stream.position = p + + if cs < 0 + error("$($(reader_type)) file format error on line ", linenum) + elseif finish_header + $(finish_code) + break + elseif p > p_eof ≥ 0 + error("incomplete $($(reader_type)) input on line ", linenum) + else + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 + p = stream.position + p_end = stream.available + if hits_eof + p_eof = p_end + end + end + end + end + end +end + +function generate_read_function(reader_type, machine, init_code, actions; kwargs...) + kwargs = Dict(kwargs) + context = Automa.CodeGenContext( + generator=get(kwargs, :generator, :goto), + checkbounds=get(kwargs, :checkbounds, false), + loopunroll=get(kwargs, :loopunroll, 0) + ) + quote + function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type)) + return _read!(reader, reader.state, record) + end + + function _read!(reader::$(reader_type), state::State, record::eltype($(reader_type))) + stream = state.stream + ensure_margin!(stream) + cs = state.cs + linenum = state.linenum + data = stream.buffer + p = stream.position + p_end = stream.available + p_eof = -1 + found_record = false + initialize!(record) + + $(init_code) + + if state.finished + throw(EOFError()) + end + + while true + $(Automa.generate_exec_code(context, machine, actions)) + + state.cs = cs + state.finished |= cs == 0 + state.linenum = linenum + stream.position = p + + if cs < 0 + error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)]))) + elseif found_record + break + elseif cs == 0 + throw(EOFError()) + elseif p > p_eof ≥ 0 + error("incomplete $($(reader_type)) input on line ", linenum) + elseif BufferedStreams.available_bytes(stream) < 64 + hits_eof = BufferedStreams.fillbuffer!(stream) == 0 + p = stream.position + p_end = stream.available + if hits_eof + p_eof = p_end + end + end + end + + @assert isfilled(record) + return record + end + end +end + +# Automa.jl generated readrecord! and readmetainfo! functions +# ======================================== + +# file = header . body +# header = metainfo* +# body = record* +isinteractive() && info("compiling SAM") +const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_machine = (function () + cat = Automa.RegExp.cat + rep = Automa.RegExp.rep + alt = Automa.RegExp.alt + opt = Automa.RegExp.opt + any = Automa.RegExp.any + + metainfo = let + tag = re"[A-Z][A-Z]" \ cat("CO") + tag.actions[:enter] = [:mark1] + tag.actions[:exit] = [:metainfo_tag] + + dict = let + key = re"[A-Za-z][A-Za-z0-9]" + key.actions[:enter] = [:mark2] + key.actions[:exit] = [:metainfo_dict_key] + val = re"[ -~]+" + val.actions[:enter] = [:mark2] + val.actions[:exit] = [:metainfo_dict_val] + keyval = cat(key, ':', val) + + cat(keyval, rep(cat('\t', keyval))) + end + dict.actions[:enter] = [:mark1] + dict.actions[:exit] = [:metainfo_val] + + co = cat("CO") + co.actions[:enter] = [:mark1] + co.actions[:exit] = [:metainfo_tag] + + comment = re"[^\r\n]*" + comment.actions[:enter] = [:mark1] + comment.actions[:exit] = [:metainfo_val] + + cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment))) + end + metainfo.actions[:enter] = [:anchor] + metainfo.actions[:exit] = [:metainfo] + + record = let + qname = re"[!-?A-~]+" + qname.actions[:enter] = [:mark] + qname.actions[:exit] = [:record_qname] + + flag = re"[0-9]+" + flag.actions[:enter] = [:mark] + flag.actions[:exit] = [:record_flag] + + rname = re"\*|[!-()+-<>-~][!-~]*" + rname.actions[:enter] = [:mark] + rname.actions[:exit] = [:record_rname] + + pos = re"[0-9]+" + pos.actions[:enter] = [:mark] + pos.actions[:exit] = [:record_pos] + + mapq = re"[0-9]+" + mapq.actions[:enter] = [:mark] + mapq.actions[:exit] = [:record_mapq] + + cigar = re"\*|([0-9]+[MIDNSHPX=])+" + cigar.actions[:enter] = [:mark] + cigar.actions[:exit] = [:record_cigar] + + rnext = re"\*|=|[!-()+-<>-~][!-~]*" + rnext.actions[:enter] = [:mark] + rnext.actions[:exit] = [:record_rnext] + + pnext = re"[0-9]+" + pnext.actions[:enter] = [:mark] + pnext.actions[:exit] = [:record_pnext] + + tlen = re"[-+]?[0-9]+" + tlen.actions[:enter] = [:mark] + tlen.actions[:exit] = [:record_tlen] + + seq = re"\*|[A-Za-z=.]+" + seq.actions[:enter] = [:mark] + seq.actions[:exit] = [:record_seq] + + qual = re"[!-~]+" + qual.actions[:enter] = [:mark] + qual.actions[:exit] = [:record_qual] + + field = let + tag = re"[A-Za-z][A-Za-z0-9]" + val = alt( + re"A:[!-~]", + re"i:[-+]?[0-9]+", + re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?", + re"Z:[ !-~]*", + re"H:([0-9A-F][0-9A-F])*", + re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+") + + cat(tag, ':', val) + end + field.actions[:enter] = [:mark] + field.actions[:exit] = [:record_field] + + cat( + qname, '\t', + flag, '\t', + rname, '\t', + pos, '\t', + mapq, '\t', + cigar, '\t', + rnext, '\t', + pnext, '\t', + tlen, '\t', + seq, '\t', + qual, + rep(cat('\t', field))) + end + record.actions[:enter] = [:anchor] + record.actions[:exit] = [:record] + + newline = let + lf = re"\n" + lf.actions[:enter] = [:countline] + + cat(re"\r?", lf) + end + + header′ = rep(cat(metainfo, newline)) + header′.actions[:exit] = [:header] + header = cat(header′, opt(any() \ cat('@'))) # look ahead + + body = rep(cat(record, newline)) + + return map(Automa.compile, (metainfo, record, header, body)) +end)() + +const sam_metainfo_actions = Dict( + :metainfo_tag => :(record.tag = (mark1:p-1) .- offset), + :metainfo_val => :(record.val = (mark1:p-1) .- offset), + :metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)), + :metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)), + :metainfo => quote + resize_and_copy!(record.data, data, offset+1:p-1) + record.filled = (offset+1:p-1) .- offset + end, + :anchor => :(), + :mark1 => :(mark1 = p), + :mark2 => :(mark2 = p)) +eval( + generate_index_function( + MetaInfo, + sam_metainfo_machine, + :(mark1 = mark2 = offset = 0), + sam_metainfo_actions)) +eval( + generate_readheader_function( + Reader, + MetaInfo, + sam_header_machine, + :(mark1 = mark2 = offset = 0), + merge(sam_metainfo_actions, Dict( + :metainfo => quote + resize_and_copy!(record.data, data, upanchor!(stream):p-1) + record.filled = (offset+1:p-1) .- offset + @assert isfilled(record) + push!(reader.header.metainfo, record) + ensure_margin!(stream) + record = MetaInfo() + end, + :header => :(finish_header = true; @escape), + :countline => :(linenum += 1), + :anchor => :(anchor!(stream, p); offset = p - 1))), + quote + if !eof(stream) + stream.position -= 1 # cancel look-ahead + end + end)) + +const sam_record_actions = Dict( + :record_qname => :(record.qname = (mark:p-1) .- offset), + :record_flag => :(record.flag = (mark:p-1) .- offset), + :record_rname => :(record.rname = (mark:p-1) .- offset), + :record_pos => :(record.pos = (mark:p-1) .- offset), + :record_mapq => :(record.mapq = (mark:p-1) .- offset), + :record_cigar => :(record.cigar = (mark:p-1) .- offset), + :record_rnext => :(record.rnext = (mark:p-1) .- offset), + :record_pnext => :(record.pnext = (mark:p-1) .- offset), + :record_tlen => :(record.tlen = (mark:p-1) .- offset), + :record_seq => :(record.seq = (mark:p-1) .- offset), + :record_qual => :(record.qual = (mark:p-1) .- offset), + :record_field => :(push!(record.fields, (mark:p-1) .- offset)), + :record => quote + resize_and_copy!(record.data, data, 1:p-1) + record.filled = (offset+1:p-1) .- offset + end, + :anchor => :(), + :mark => :(mark = p)) +eval( + generate_index_function( + Record, + sam_record_machine, + :(mark = offset = 0), + sam_record_actions)) +eval( + generate_read_function( + Reader, + sam_body_machine, + :(mark = offset = 0), + merge(sam_record_actions, Dict( + :record => quote + resize_and_copy!(record.data, data, upanchor!(stream):p-1) + record.filled = (offset+1:p-1) .- offset + found_record = true + @escape + end, + :countline => :(linenum += 1), + :anchor => :(anchor!(stream, p); offset = p - 1))))) diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 628b085..5b6bab8 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -66,6 +66,7 @@ include("metainfo.jl") include("record.jl") include("header.jl") include("reader.jl") +include("readrecord.jl") include("writer.jl") end From 892ad0d7dc8578fae4f9c24c4624561e5b8cfd54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 8 Sep 2019 09:08:12 +1000 Subject: [PATCH 10/14] Pipe into eval --- src/sam/readrecord.jl | 109 ++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 4a18064..0f1811a 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -317,36 +317,39 @@ const sam_metainfo_actions = Dict( end, :anchor => :(), :mark1 => :(mark1 = p), - :mark2 => :(mark2 = p)) -eval( - generate_index_function( - MetaInfo, - sam_metainfo_machine, - :(mark1 = mark2 = offset = 0), - sam_metainfo_actions)) -eval( - generate_readheader_function( - Reader, - MetaInfo, - sam_header_machine, - :(mark1 = mark2 = offset = 0), - merge(sam_metainfo_actions, Dict( - :metainfo => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset - @assert isfilled(record) - push!(reader.header.metainfo, record) - ensure_margin!(stream) - record = MetaInfo() - end, - :header => :(finish_header = true; @escape), - :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1))), - quote - if !eof(stream) - stream.position -= 1 # cancel look-ahead - end - end)) + :mark2 => :(mark2 = p) +) + +generate_index_function( + MetaInfo, + sam_metainfo_machine, + :(mark1 = mark2 = offset = 0), + sam_metainfo_actions +) |> eval + +generate_readheader_function( + Reader, + MetaInfo, + sam_header_machine, + :(mark1 = mark2 = offset = 0), + merge(sam_metainfo_actions, Dict( + :metainfo => quote + resize_and_copy!(record.data, data, upanchor!(stream):p-1) + record.filled = (offset+1:p-1) .- offset + @assert isfilled(record) + push!(reader.header.metainfo, record) + ensure_margin!(stream) + record = MetaInfo() + end, + :header => :(finish_header = true; @escape), + :countline => :(linenum += 1), + :anchor => :(anchor!(stream, p); offset = p - 1))), + quote + if !eof(stream) + stream.position -= 1 # cancel look-ahead + end + end +) |> eval const sam_record_actions = Dict( :record_qname => :(record.qname = (mark:p-1) .- offset), @@ -366,24 +369,28 @@ const sam_record_actions = Dict( record.filled = (offset+1:p-1) .- offset end, :anchor => :(), - :mark => :(mark = p)) -eval( - generate_index_function( - Record, - sam_record_machine, - :(mark = offset = 0), - sam_record_actions)) -eval( - generate_read_function( - Reader, - sam_body_machine, - :(mark = offset = 0), - merge(sam_record_actions, Dict( - :record => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset - found_record = true - @escape - end, - :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1))))) + :mark => :(mark = p) +) + +generate_index_function( + Record, + sam_record_machine, + :(mark = offset = 0), + sam_record_actions +) |> eval + +generate_read_function( + Reader, + sam_body_machine, + :(mark = offset = 0), + merge(sam_record_actions, Dict( + :record => quote + resize_and_copy!(record.data, data, upanchor!(stream):p-1) + record.filled = (offset+1:p-1) .- offset + found_record = true + @escape + end, + :countline => :(linenum += 1), + :anchor => :(anchor!(stream, p); offset = p - 1)) + ) +) |> eval From 6cd89aba08cca53cc6888f6109edb7119907438e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 7 Sep 2019 21:47:00 +1000 Subject: [PATCH 11/14] Move isinteractive into machine --- src/sam/readrecord.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 0f1811a..1cb27d5 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -173,8 +173,10 @@ end # file = header . body # header = metainfo* # body = record* -isinteractive() && info("compiling SAM") const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_machine = (function () + + isinteractive() && info("compiling SAM") + cat = Automa.RegExp.cat rep = Automa.RegExp.rep alt = Automa.RegExp.alt From 12db2c4ea84c1ba045f49ae2cbac7f6b518fe583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 30 Nov 2019 21:18:34 +1100 Subject: [PATCH 12/14] Align emission symbols for TranscodingStreams --- src/sam/readrecord.jl | 94 +++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 1cb27d5..82ebb93 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -185,79 +185,79 @@ const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_mac metainfo = let tag = re"[A-Z][A-Z]" \ cat("CO") - tag.actions[:enter] = [:mark1] + tag.actions[:enter] = [:pos1] tag.actions[:exit] = [:metainfo_tag] dict = let key = re"[A-Za-z][A-Za-z0-9]" - key.actions[:enter] = [:mark2] + key.actions[:enter] = [:pos2] key.actions[:exit] = [:metainfo_dict_key] val = re"[ -~]+" - val.actions[:enter] = [:mark2] + val.actions[:enter] = [:pos2] val.actions[:exit] = [:metainfo_dict_val] keyval = cat(key, ':', val) cat(keyval, rep(cat('\t', keyval))) end - dict.actions[:enter] = [:mark1] + dict.actions[:enter] = [:pos1] dict.actions[:exit] = [:metainfo_val] co = cat("CO") - co.actions[:enter] = [:mark1] + co.actions[:enter] = [:pos1] co.actions[:exit] = [:metainfo_tag] comment = re"[^\r\n]*" - comment.actions[:enter] = [:mark1] + comment.actions[:enter] = [:pos1] comment.actions[:exit] = [:metainfo_val] cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment))) end - metainfo.actions[:enter] = [:anchor] + metainfo.actions[:enter] = [:mark] metainfo.actions[:exit] = [:metainfo] record = let qname = re"[!-?A-~]+" - qname.actions[:enter] = [:mark] + qname.actions[:enter] = [:pos] qname.actions[:exit] = [:record_qname] flag = re"[0-9]+" - flag.actions[:enter] = [:mark] + flag.actions[:enter] = [:pos] flag.actions[:exit] = [:record_flag] rname = re"\*|[!-()+-<>-~][!-~]*" - rname.actions[:enter] = [:mark] + rname.actions[:enter] = [:pos] rname.actions[:exit] = [:record_rname] pos = re"[0-9]+" - pos.actions[:enter] = [:mark] + pos.actions[:enter] = [:pos] pos.actions[:exit] = [:record_pos] mapq = re"[0-9]+" - mapq.actions[:enter] = [:mark] + mapq.actions[:enter] = [:pos] mapq.actions[:exit] = [:record_mapq] cigar = re"\*|([0-9]+[MIDNSHPX=])+" - cigar.actions[:enter] = [:mark] + cigar.actions[:enter] = [:pos] cigar.actions[:exit] = [:record_cigar] rnext = re"\*|=|[!-()+-<>-~][!-~]*" - rnext.actions[:enter] = [:mark] + rnext.actions[:enter] = [:pos] rnext.actions[:exit] = [:record_rnext] pnext = re"[0-9]+" - pnext.actions[:enter] = [:mark] + pnext.actions[:enter] = [:pos] pnext.actions[:exit] = [:record_pnext] tlen = re"[-+]?[0-9]+" - tlen.actions[:enter] = [:mark] + tlen.actions[:enter] = [:pos] tlen.actions[:exit] = [:record_tlen] seq = re"\*|[A-Za-z=.]+" - seq.actions[:enter] = [:mark] + seq.actions[:enter] = [:pos] seq.actions[:exit] = [:record_seq] qual = re"[!-~]+" - qual.actions[:enter] = [:mark] + qual.actions[:enter] = [:pos] qual.actions[:exit] = [:record_qual] field = let @@ -272,7 +272,7 @@ const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_mac cat(tag, ':', val) end - field.actions[:enter] = [:mark] + field.actions[:enter] = [:pos] field.actions[:exit] = [:record_field] cat( @@ -289,7 +289,7 @@ const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_mac qual, rep(cat('\t', field))) end - record.actions[:enter] = [:anchor] + record.actions[:enter] = [:mark] record.actions[:exit] = [:record] newline = let @@ -309,23 +309,23 @@ const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_mac end)() const sam_metainfo_actions = Dict( - :metainfo_tag => :(record.tag = (mark1:p-1) .- offset), - :metainfo_val => :(record.val = (mark1:p-1) .- offset), - :metainfo_dict_key => :(push!(record.dictkey, (mark2:p-1) .- offset)), - :metainfo_dict_val => :(push!(record.dictval, (mark2:p-1) .- offset)), + :metainfo_tag => :(record.tag = (pos1:p-1) .- offset), + :metainfo_val => :(record.val = (pos1:p-1) .- offset), + :metainfo_dict_key => :(push!(record.dictkey, (pos2:p-1) .- offset)), + :metainfo_dict_val => :(push!(record.dictval, (pos2:p-1) .- offset)), :metainfo => quote resize_and_copy!(record.data, data, offset+1:p-1) record.filled = (offset+1:p-1) .- offset end, - :anchor => :(), - :mark1 => :(mark1 = p), - :mark2 => :(mark2 = p) + :mark => :(), + :pos1 => :(pos1 = p), + :pos2 => :(pos2 = p) ) generate_index_function( MetaInfo, sam_metainfo_machine, - :(mark1 = mark2 = offset = 0), + :(pos1 = pos2 = offset = 0), sam_metainfo_actions ) |> eval @@ -333,7 +333,7 @@ generate_readheader_function( Reader, MetaInfo, sam_header_machine, - :(mark1 = mark2 = offset = 0), + :(pos1 = pos2 = offset = 0), merge(sam_metainfo_actions, Dict( :metainfo => quote resize_and_copy!(record.data, data, upanchor!(stream):p-1) @@ -345,7 +345,7 @@ generate_readheader_function( end, :header => :(finish_header = true; @escape), :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1))), + :mark => :(anchor!(stream, p); offset = p - 1))), quote if !eof(stream) stream.position -= 1 # cancel look-ahead @@ -354,37 +354,37 @@ generate_readheader_function( ) |> eval const sam_record_actions = Dict( - :record_qname => :(record.qname = (mark:p-1) .- offset), - :record_flag => :(record.flag = (mark:p-1) .- offset), - :record_rname => :(record.rname = (mark:p-1) .- offset), - :record_pos => :(record.pos = (mark:p-1) .- offset), - :record_mapq => :(record.mapq = (mark:p-1) .- offset), - :record_cigar => :(record.cigar = (mark:p-1) .- offset), - :record_rnext => :(record.rnext = (mark:p-1) .- offset), - :record_pnext => :(record.pnext = (mark:p-1) .- offset), - :record_tlen => :(record.tlen = (mark:p-1) .- offset), - :record_seq => :(record.seq = (mark:p-1) .- offset), - :record_qual => :(record.qual = (mark:p-1) .- offset), - :record_field => :(push!(record.fields, (mark:p-1) .- offset)), + :record_qname => :(record.qname = (pos:p-1) .- offset), + :record_flag => :(record.flag = (pos:p-1) .- offset), + :record_rname => :(record.rname = (pos:p-1) .- offset), + :record_pos => :(record.pos = (pos:p-1) .- offset), + :record_mapq => :(record.mapq = (pos:p-1) .- offset), + :record_cigar => :(record.cigar = (pos:p-1) .- offset), + :record_rnext => :(record.rnext = (pos:p-1) .- offset), + :record_pnext => :(record.pnext = (pos:p-1) .- offset), + :record_tlen => :(record.tlen = (pos:p-1) .- offset), + :record_seq => :(record.seq = (pos:p-1) .- offset), + :record_qual => :(record.qual = (pos:p-1) .- offset), + :record_field => :(push!(record.fields, (pos:p-1) .- offset)), :record => quote resize_and_copy!(record.data, data, 1:p-1) record.filled = (offset+1:p-1) .- offset end, - :anchor => :(), - :mark => :(mark = p) + :mark => :(), + :pos => :(pos = p) ) generate_index_function( Record, sam_record_machine, - :(mark = offset = 0), + :(pos = offset = 0), sam_record_actions ) |> eval generate_read_function( Reader, sam_body_machine, - :(mark = offset = 0), + :(pos = offset = 0), merge(sam_record_actions, Dict( :record => quote resize_and_copy!(record.data, data, upanchor!(stream):p-1) @@ -393,6 +393,6 @@ generate_read_function( @escape end, :countline => :(linenum += 1), - :anchor => :(anchor!(stream, p); offset = p - 1)) + :mark => :(anchor!(stream, p); offset = p - 1)) ) ) |> eval From de56faf0667078ff20bb17747f77b0dd422af2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Thu, 20 Feb 2020 21:19:53 +1100 Subject: [PATCH 13/14] Migrate from BufferedStreams to TranscodingStreams --- Project.toml | 4 +- src/sam/reader.jl | 76 ++++++- src/sam/readrecord.jl | 489 +++++++++++++++++++++--------------------- src/sam/sam.jl | 31 +-- 4 files changed, 327 insertions(+), 273 deletions(-) diff --git a/Project.toml b/Project.toml index 850b570..29455b6 100644 --- a/Project.toml +++ b/Project.toml @@ -9,10 +9,10 @@ BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" -BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" [compat] Automa = "0.7, 0.8" @@ -20,9 +20,9 @@ BGZFStreams = "0.3" BioAlignments = "2" BioGenerics = "0.1" BioSequences = "2" -BufferedStreams = "1" GenomicFeatures = "2" Indexes = "0.1" +TranscodingStreams = "0.6, 0.7, 0.8, 0.9" julia = "1.1" [extras] diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 0406165..3270957 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -1,16 +1,22 @@ # SAM Reader # ========= -mutable struct Reader <: BioGenerics.IO.AbstractReader - state::State +mutable struct Reader{S <: TranscodingStream} <: BioGenerics.IO.AbstractReader + state::State{S} header::Header +end - function Reader(input::BufferedStreams.BufferedInputStream) - reader = new(State(sam_header_machine.start_state, input), Header()) - readheader!(reader) - reader.state.cs = sam_body_machine.start_state - return reader - end +function Reader(state::State{S}) where {S <: TranscodingStream} + + rdr = Reader(state, Header()) + + cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum)) + + rdr.state.state = sam_machine_body.start_state + rdr.state.linenum = ln + rdr.state.filled = false + + return rdr end """ @@ -22,7 +28,19 @@ Create a data reader of the SAM file format. * `input`: data source """ function Reader(input::IO) - return Reader(BufferedStreams.BufferedInputStream(input)) + + if input isa TranscodingStream + return Reader(State(input, 1, 1, false)) + end + + stream = TranscodingStreams.NoopStream(input) + + return Reader(State(stream, 1, 1, false)) + +end + +function Base.eltype(::Type{<:Reader}) + return Record end function BioGenerics.IO.stream(reader::Reader) @@ -38,6 +56,42 @@ function header(reader::Reader)::Header return reader.header end -function Base.eltype(::Type{Reader}) - return Record +function Base.close(reader::Reader) + if reader.state.stream isa IO + close(reader.state.stream) + end + return nothing +end + +function index!(record::MetaInfo) + stream = TranscodingStreams.NoopStream(IOBuffer(record.data)) + found = index!(stream, record) + if !found + throw(ArgumentError("invalid SAM metadata")) + end + return record +end + +function index!(record::Record) + stream = TranscodingStreams.NoopStream(IOBuffer(record.data)) + found = index!(stream, record) + if !found + throw(ArgumentError("invalid SAM record")) + end + return record +end + +function Base.read!(rdr::Reader, rec::Record) + + cs, ln, f = readrecord!(rdr.state.stream, rec, (rdr.state.state, rdr.state.linenum)) + + rdr.state.state = cs + rdr.state.linenum = ln + rdr.state.filled = f + + if !f + cs == 0 && throw(EOFError()) + throw(ArgumentError("malformed SAM file")) + end + return rec end diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 82ebb93..e7ea75f 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -1,179 +1,10 @@ -@inline function anchor!(stream::BufferedStreams.BufferedInputStream, p, immobilize = true) - stream.anchor = p - stream.immobilized = immobilize - return stream -end - -@inline function upanchor!(stream::BufferedStreams.BufferedInputStream) - @assert stream.anchor != 0 "upanchor! called with no anchor set" - anchor = stream.anchor - stream.anchor = 0 - stream.immobilized = false - return anchor -end - -function ensure_margin!(stream::BufferedStreams.BufferedInputStream) - if stream.position * 20 > length(stream.buffer) * 19 - BufferedStreams.shiftdata!(stream) - end - return nothing -end - -@inline function resize_and_copy!(dst::Vector{UInt8}, src::Vector{UInt8}, r::UnitRange{Int}) - return resize_and_copy!(dst, 1, src, r) -end - -@inline function resize_and_copy!(dst::Vector{UInt8}, dstart::Int, src::Vector{UInt8}, r::UnitRange{Int}) - rlen = length(r) - if length(dst) != dstart + rlen - 1 - resize!(dst, dstart + rlen - 1) - end - copyto!(dst, dstart, src, first(r), rlen) - return dst -end - -function generate_index_function(record_type, machine, init_code, actions; kwargs...) - kwargs = Dict(kwargs) - context = Automa.CodeGenContext( - generator = get(kwargs, :generator, :goto), - checkbounds = get(kwargs, :checkbounds, false), - loopunroll = get(kwargs, :loopunroll, 0) - ) - quote - function index!(record::$(record_type)) - data = record.data - p = 1 - p_end = p_eof = sizeof(data) - initialize!(record) - $(init_code) - cs = $(machine.start_state) - $(Automa.generate_exec_code(context, machine, actions)) - if cs != 0 - throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) - end - @assert isfilled(record) - return record - end - end -end - -function generate_readheader_function(reader_type, metainfo_type, machine, init_code, actions, finish_code=:()) - quote - function readheader!(reader::$(reader_type)) - _readheader!(reader, reader.state) - end - - function _readheader!(reader::$(reader_type), state::State) - stream = state.stream - ensure_margin!(stream) - cs = state.cs - linenum = state.linenum - data = stream.buffer - p = stream.position - p_end = stream.available - p_eof = -1 - finish_header = false - record = $(metainfo_type)() - - $(init_code) - - while true - $(Automa.generate_exec_code(Automa.CodeGenContext(generator=:table), machine, actions)) - - state.cs = cs - state.finished = cs == 0 - state.linenum = linenum - stream.position = p - - if cs < 0 - error("$($(reader_type)) file format error on line ", linenum) - elseif finish_header - $(finish_code) - break - elseif p > p_eof ≥ 0 - error("incomplete $($(reader_type)) input on line ", linenum) - else - hits_eof = BufferedStreams.fillbuffer!(stream) == 0 - p = stream.position - p_end = stream.available - if hits_eof - p_eof = p_end - end - end - end - end - end -end - -function generate_read_function(reader_type, machine, init_code, actions; kwargs...) - kwargs = Dict(kwargs) - context = Automa.CodeGenContext( - generator=get(kwargs, :generator, :goto), - checkbounds=get(kwargs, :checkbounds, false), - loopunroll=get(kwargs, :loopunroll, 0) - ) - quote - function Base.read!(reader::$(reader_type), record::eltype($(reader_type)))::eltype($(reader_type)) - return _read!(reader, reader.state, record) - end - - function _read!(reader::$(reader_type), state::State, record::eltype($(reader_type))) - stream = state.stream - ensure_margin!(stream) - cs = state.cs - linenum = state.linenum - data = stream.buffer - p = stream.position - p_end = stream.available - p_eof = -1 - found_record = false - initialize!(record) - - $(init_code) - - if state.finished - throw(EOFError()) - end - - while true - $(Automa.generate_exec_code(context, machine, actions)) - - state.cs = cs - state.finished |= cs == 0 - state.linenum = linenum - stream.position = p - - if cs < 0 - error($(reader_type), " file format error on line ", linenum, " ~>", repr(String(data[p:min(p+7,p_end)]))) - elseif found_record - break - elseif cs == 0 - throw(EOFError()) - elseif p > p_eof ≥ 0 - error("incomplete $($(reader_type)) input on line ", linenum) - elseif BufferedStreams.available_bytes(stream) < 64 - hits_eof = BufferedStreams.fillbuffer!(stream) == 0 - p = stream.position - p_end = stream.available - if hits_eof - p_eof = p_end - end - end - end - - @assert isfilled(record) - return record - end - end -end - # Automa.jl generated readrecord! and readmetainfo! functions # ======================================== # file = header . body # header = metainfo* # body = record* -const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_machine = (function () +const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = (function () isinteractive() && info("compiling SAM") @@ -304,95 +135,261 @@ const sam_metainfo_machine, sam_record_machine, sam_header_machine, sam_body_mac header = cat(header′, opt(any() \ cat('@'))) # look ahead body = rep(cat(record, newline)) + body.actions[:exit] = [:body] - return map(Automa.compile, (metainfo, record, header, body)) + sam = cat(header, body) + + return map(Automa.compile, (metainfo, record, header, body, sam)) end)() -const sam_metainfo_actions = Dict( - :metainfo_tag => :(record.tag = (pos1:p-1) .- offset), - :metainfo_val => :(record.val = (pos1:p-1) .- offset), - :metainfo_dict_key => :(push!(record.dictkey, (pos2:p-1) .- offset)), - :metainfo_dict_val => :(push!(record.dictval, (pos2:p-1) .- offset)), +# write("sam_machine_metainfo.dot", Automa.machine2dot(sam_machine_metainfo)) +# run(`dot -Tsvg -o sam_machine_metainfo.svg sam_machine_metainfo.dot`) +# +# write("sam_machine_record.dot", Automa.machine2dot(sam_machine_record)) +# run(`dot -Tsvg -o sam_machine_record.svg sam_machine_record.dot`) +# +# write("sam_machine_header.dot", Automa.machine2dot(sam_machine_header)) +# run(`dot -Tsvg -o sam_machine_header.svg sam_machine_header.dot`) +# +# write("sam_machine_body.dot", Automa.machine2dot(sam_machine_body)) +# run(`dot -Tsvg -o sam_machine_body.svg sam_machine_body.dot`) +# +# write("sam_machine.dot", Automa.machine2dot(sam_machine)) +# run(`dot -Tsvg -o sam_machine.svg sam_machine.dot`) + +function appendfrom!(dst, dpos, src, spos, n) + if length(dst) < dpos + n - 1 + resize!(dst, dpos + n - 1) + end + copyto!(dst, dpos, src, spos, n) + return dst +end + +const sam_actions_metainfo = Dict( + :mark => :(@mark), + :pos1 => :(pos1 = @relpos(p)), + :pos2 => :(pos2 = @relpos(p)), + :metainfo_tag => :(metainfo.tag = pos1:@relpos(p-1)), + :metainfo_val => :(metainfo.val = pos1:@relpos(p-1)), + :metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))), + :metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))), :metainfo => quote - resize_and_copy!(record.data, data, offset+1:p-1) - record.filled = (offset+1:p-1) .- offset - end, - :mark => :(), - :pos1 => :(pos1 = p), - :pos2 => :(pos2 = p) -) + let markpos = @markpos() -generate_index_function( - MetaInfo, - sam_metainfo_machine, - :(pos1 = pos2 = offset = 0), - sam_metainfo_actions -) |> eval + appendfrom!(metainfo.data, 1, data, markpos, length(markpos:p-1)) -generate_readheader_function( - Reader, - MetaInfo, - sam_header_machine, - :(pos1 = pos2 = offset = 0), - merge(sam_metainfo_actions, Dict( - :metainfo => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset - @assert isfilled(record) - push!(reader.header.metainfo, record) - ensure_margin!(stream) - record = MetaInfo() - end, - :header => :(finish_header = true; @escape), - :countline => :(linenum += 1), - :mark => :(anchor!(stream, p); offset = p - 1))), - quote - if !eof(stream) - stream.position -= 1 # cancel look-ahead + metainfo.filled = @relpos(markpos):@relpos(p-1) + + found_metainfo = true end end -) |> eval - -const sam_record_actions = Dict( - :record_qname => :(record.qname = (pos:p-1) .- offset), - :record_flag => :(record.flag = (pos:p-1) .- offset), - :record_rname => :(record.rname = (pos:p-1) .- offset), - :record_pos => :(record.pos = (pos:p-1) .- offset), - :record_mapq => :(record.mapq = (pos:p-1) .- offset), - :record_cigar => :(record.cigar = (pos:p-1) .- offset), - :record_rnext => :(record.rnext = (pos:p-1) .- offset), - :record_pnext => :(record.pnext = (pos:p-1) .- offset), - :record_tlen => :(record.tlen = (pos:p-1) .- offset), - :record_seq => :(record.seq = (pos:p-1) .- offset), - :record_qual => :(record.qual = (pos:p-1) .- offset), - :record_field => :(push!(record.fields, (pos:p-1) .- offset)), - :record => quote - resize_and_copy!(record.data, data, 1:p-1) - record.filled = (offset+1:p-1) .- offset - end, - :mark => :(), - :pos => :(pos = p) ) -generate_index_function( - Record, - sam_record_machine, - :(pos = offset = 0), - sam_record_actions -) |> eval +const sam_actions_header = merge( + sam_actions_metainfo, + Dict( + :countline => :(linenum += 1), + :header => quote + + finish_header = true + + if !eof(stream) + p -= 1 # cancel look-ahead + end + + @escape + end + ) +) + +const sam_actions_record = Dict( + :mark => :(@mark), + :pos => :(pos = @relpos(p)), + :record_qname => :(record.qname = pos:@relpos(p-1)), + :record_flag => :(record.flag = pos:@relpos(p-1)), + :record_rname => :(record.rname = pos:@relpos(p-1)), + :record_pos => :(record.pos = pos:@relpos(p-1)), + :record_mapq => :(record.mapq = pos:@relpos(p-1)), + :record_cigar => :(record.cigar = pos:@relpos(p-1)), + :record_rnext => :(record.rnext = pos:@relpos(p-1)), + :record_pnext => :(record.pnext = pos:@relpos(p-1)), + :record_tlen => :(record.tlen = pos:@relpos(p-1)), + :record_seq => :(record.seq = pos:@relpos(p-1)), + :record_qual => :(record.qual = pos:@relpos(p-1)), + :record_field => :(push!(record.fields, pos:@relpos(p-1))), + :record => quote + let markpos = @markpos() + + appendfrom!(record.data, 1, data, markpos, length(markpos:p-1)) + + record.filled = @relpos(markpos):@relpos(p-1) -generate_read_function( - Reader, - sam_body_machine, - :(pos = offset = 0), - merge(sam_record_actions, Dict( - :record => quote - resize_and_copy!(record.data, data, upanchor!(stream):p-1) - record.filled = (offset+1:p-1) .- offset found_record = true @escape - end, + end + end +) + +const sam_actions_body = merge( + sam_actions_record, + Dict( :countline => :(linenum += 1), - :mark => :(anchor!(stream, p); offset = p - 1)) + :body => quote + finish_body = true + @escape + end ) +) + +# const sam_actions = merge( +# sam_actions_header, +# sam_actions_body +# ) + +const sam_context = Automa.CodeGenContext( + generator = :goto, + checkbounds = false, + loopunroll = 0 +) + +const sam_initcode_metainfo = quote + pos1 = 0 + pos2 = 0 + found_metainfo = false +end + +const sam_initcode_record = quote + pos = 0 + found_record = false +end + +const sam_initcode_header = quote + $(sam_initcode_metainfo) + metainfo = MetaInfo() + finish_header = false + cs, linenum = state +end + +const sam_initcode_body = quote + $(sam_initcode_record) + finish_body = false + cs, linenum = state +end + +const sam_loopcode_metainfo = quote + + if cs < 0 + throw(ArgumentError("malformed metainfo at pos $(p)")) + end + + if found_metainfo + @goto __return__ + end +end + +const sam_returncode_metainfo = quote + return found_metainfo +end + + +Automa.Stream.generate_reader( + :index!, + sam_machine_metainfo, + arguments = (:(metainfo::MetaInfo),), + actions = sam_actions_metainfo, + context = sam_context, + initcode = sam_initcode_metainfo, + loopcode = sam_loopcode_metainfo, + returncode = sam_returncode_metainfo +) |> eval + +const sam_loopcode_header = quote + + if cs < 0 + throw(ArgumentError("malformed metainfo at line $(linenum)")) + end + + if found_metainfo + push!(header, metainfo) + found_metainfo = false + end + + metainfo = MetaInfo() + + if finish_header + @goto __return__ + end +end + +const sam_returncode_header = quote + return cs, linenum, finish_header +end + +Automa.Stream.generate_reader( + :readheader!, + sam_machine_header, + arguments = (:(header::SAM.Header), :(state::Tuple{Int,Int})), + actions = sam_actions_header, + context = sam_context, + initcode = sam_initcode_header, + loopcode = sam_loopcode_header, + returncode = sam_returncode_header +) |> eval + + +const sam_loopcode_record = quote + + if cs < 0 + throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)")) + end + + # # if cs != 0 + # # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) + # # end + + if found_record + @goto __return__ + end + +end + +const sam_loopcode_body = quote + + $(sam_loopcode_record) + + if finish_body + @goto __return__ + end +end + +const sam_returncode_record = quote + return found_record +end + +Automa.Stream.generate_reader( + :index!, + sam_machine_record, + arguments = (:(record::Record),), + actions = sam_actions_record, + context = sam_context, + initcode = sam_initcode_record, + loopcode = sam_loopcode_record, + returncode = sam_returncode_record +) |> eval + + + +const sam_returncode_body = quote + return cs, linenum, found_record +end + +Automa.Stream.generate_reader( + :readrecord!, + sam_machine_body, + arguments = (:(record::Record), :(state::Tuple{Int,Int})), + actions = sam_actions_body, + context = sam_context, + initcode = sam_initcode_body, + loopcode = sam_loopcode_body, + returncode = sam_returncode_body ) |> eval diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 5b6bab8..95e36f6 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -7,11 +7,14 @@ using BioGenerics import Automa import Automa.RegExp: @re_str +import Automa.Stream: @mark, @markpos, @relpos, @abspos import BioAlignments +import BioGenerics: BioGenerics, isfilled, header import BioGenerics.Exceptions: missingerror -import BioGenerics: isfilled, header +import BioGenerics.Automa: State import BioSequences -import BufferedStreams +import TranscodingStreams: TranscodingStreams, TranscodingStream + using Printf: @sprintf @@ -46,19 +49,19 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I return sign * x end -#TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams). -import BufferedStreams: BufferedStreams, BufferedInputStream -# A type keeping track of a ragel-based parser's state. -mutable struct State{T<:BufferedInputStream} - stream::T # input stream - cs::Int # current DFA state of Ragel - linenum::Int # line number: parser is responsible for updating this - finished::Bool # true if finished (regardless of where in the stream we are) -end +# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams). +# import BufferedStreams: BufferedStreams, BufferedInputStream +# # A type keeping track of a ragel-based parser's state. +# mutable struct State{T<:BufferedInputStream} +# stream::T # input stream +# cs::Int # current DFA state of Ragel +# linenum::Int # line number: parser is responsible for updating this +# finished::Bool # true if finished (regardless of where in the stream we are) +# end -function State(initstate::Int, input::BufferedInputStream) - return State(input, initstate, 1, false) -end +# function State(initstate::Int, input::BufferedInputStream) +# return State(input, initstate, 1, false) +# end include("flags.jl") From 8702799593818ceaad2817a338e312a1b77d936b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 16 Mar 2020 21:59:40 +1100 Subject: [PATCH 14/14] Increment version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 29455b6..2acfbc3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "Ciarán O'Mara "] -version = "0.1.1" +version = "0.2.0" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"