Merge pull request #52 from BioJulia/release/0.3.0

Release v0.3.0
2024-12-23 13:28:16 +00:00 · 2022-10-10 15:55:49 -05:00 · 2022-10-10 15:55:49 -05:00 · 946e77a734
commit 946e77a734
parent 4490e7c890 0c4b3f5dd0
25 changed files with 323 additions and 237 deletions
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@ -1,17 +1,45 @@
 name: CompatHelper
-
 on:
  schedule:
-    - cron: '0 0 * * *'
-
+    - cron: 0 0 * * *
+  workflow_dispatch:
+permissions:
+  contents: write
+  pull-requests: write
 jobs:
  CompatHelper:
    runs-on: ubuntu-latest
    steps:
-      - name: Add CompatHelper
-        run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")'
-      - name: Run CompatHelper
+           - name: Check if Julia is already available in the PATH
+        id: julia_in_path
+        run: which julia
+        continue-on-error: true
+      - name: Install Julia, but only if it is not already available in the PATH
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+          arch: ${{ runner.arch }}
+        if: steps.julia_in_path.outcome != 'success'
+      - name: "Add the General registry via Git"
+        run: |
+          import Pkg
+          ENV["JULIA_PKG_SERVER"] = ""
+          Pkg.Registry.add("General")
+        shell: julia --color=yes {0}
+      - name: "Install CompatHelper"
+        run: |
+          import Pkg
+          name = "CompatHelper"
+          uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+          version = "3"
+          Pkg.add(; name, uuid, version)
+        shell: julia --color=yes {0}
+      - name: "Run CompatHelper"
+        run: |
+          import CompatHelper
+          CompatHelper.main()
+        shell: julia --color=yes {0}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
-        run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")'
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
+          # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
--- a/.github/workflows/Documentation.yml
+++ b/.github/workflows/Documentation.yml
@ -1,5 +1,4 @@
-name: Build Documentation
-
+name: Documentation
 on:
  push:
    branches:
@ -10,17 +9,15 @@ on:
  pull_request:

 jobs:
-  build:
+  Documenter:
+    permissions:
+      contents: write
+    name: Documentation    
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: '1'
-      - name: Install Dependencies
-        run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
-      - name: Build and Deploy
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
        env:
-          # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key
-        run: julia --color=yes --project=docs/ docs/make.jl
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@ -1,12 +1,22 @@
 name: TagBot
+
 on:
-  schedule:
-    - cron: '0 0 * * *'
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
+    inputs:
+      lookback:
+        default: 3
+permissions:
+  contents: write
+
 jobs:
  TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
    runs-on: ubuntu-latest
    steps:
      - uses: JuliaRegistries/TagBot@v1
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
-          ssh: ${{ secrets.TAGBOT_KEY }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
--- a/.github/workflows/UnitTests.yml
+++ b/.github/workflows/UnitTests.yml
@ -6,16 +6,20 @@ on:

 jobs:
  test:
+    name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }}
    runs-on: ${{ matrix.os }}
    continue-on-error: ${{ matrix.experimental }}
    strategy:
      fail-fast: false
      matrix:
        julia-version:
-          - '1.0' # LTS
+          - '1.6' # LTS
          - '1'
        julia-arch: [x64, x86]
        os: [ubuntu-latest, windows-latest, macOS-latest]
+        exclude:
+          - os: macOS-latest
+            julia-arch: x86
        experimental: [false]
        include:
          - julia-version: nightly
@ -31,7 +35,7 @@ jobs:
        with:
          version: ${{ matrix.julia-version }}
      - name: Run Tests
-        uses: julia-actions/julia-runtest@latest
+        uses: julia-actions/julia-runtest@v1
      - name: Create CodeCov
        uses: julia-actions/julia-processcoverage@v1
      - name: Upload CodeCov
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,27 @@
+# Changelog
+
+All notable changes to HapLink.jl will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [0.3.0]
+
+## Added
+
+- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29))
+- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43))
+
+### Changed
+
+- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21))
+- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48))
+
+### Fixed
+
+- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26))
+
+[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD
+[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0
--- a/Project.toml
+++ b/Project.toml
@ -1,7 +1,7 @@
 name = "XAM"
 uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c"
 authors = ["Kenta Sato <bicycle1885@gmail.com>", "Ben J. Ward <ward9250@gmail.com>", "Ciarán O'Mara <Ciaran.OMara@utas.edu.au>"]
-version = "0.2.8"
+version = "0.3.0"

 [deps]
 Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
@ -17,18 +17,19 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
 [compat]
 Automa = "0.7, 0.8"
 BGZFStreams = "0.3.1"
-BioAlignments = "2"
+BioAlignments = "2.2"
 BioGenerics = "0.1"
-BioSequences = "2.0.4"
+BioSequences = "3"
 FormatSpecimens = "1.1"
 GenomicFeatures = "2"
 Indexes = "0.1"
 TranscodingStreams = "0.6, 0.7, 0.8, 0.9"
-julia = "1"
+julia = "1.6"

 [extras]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

 [targets]
-test = ["FormatSpecimens", "Test"]
+test = ["Documenter", "FormatSpecimens", "Test"]
--- a/README.md
+++ b/README.md
@ -5,7 +5,6 @@
 [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
 [![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
 [![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
-[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)

 > This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original
 blog post").
@ -66,4 +65,4 @@ Your logo will show up here with a link to your website.


 ## Questions?
-If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
+If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
--- a/docs/Project.toml
+++ b/docs/Project.toml
@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

 [compat]
-Documenter = "0.24"
+Documenter = "0.27"
--- a/docs/make.jl
+++ b/docs/make.jl
@ -2,6 +2,8 @@ using Pkg
 using Documenter, XAM

 makedocs(
+    checkdocs = :all,
+    linkcheck = true,
    format = Documenter.HTML(
        edit_link = "develop"
    ),
--- a/docs/src/index.md
+++ b/docs/src/index.md
@ -5,7 +5,6 @@
 [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE)
 [![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable)
 [![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/)
-[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl)

 > This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/).

@ -65,4 +64,4 @@ Your logo will show up here with a link to your website.


 ## Questions?
-If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
+If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio).
--- a/src/XAM.jl
+++ b/src/XAM.jl
@ -4,6 +4,27 @@ export
    SAM,
    BAM

+"""
+    flag(record::Union{SAM.Record, BAM.Record})::UInt16
+
+Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag
+being OR'd together. The possible flags are:
+
+    0x0001 template having multiple segments in sequencing
+    0x0002 each segment properly aligned according to the aligner
+    0x0004 segment unmapped
+    0x0008 next segment in the template unmapped
+    0x0010 SEQ being reverse complemented
+    0x0020 SEQ of the next segment in the template being reverse complemented
+    0x0040 the first segment in the template
+    0x0080 the last segment in the template
+    0x0100 secondary alignment
+    0x0200 not passing filters, such as platform/vendor quality controls
+    0x0400 PCR or optical duplicate
+    0x0800 supplementary alignment
+"""
+function flag end
+
 include("sam/sam.jl")
 include("bam/bam.jl")

--- a/src/bam/bam.jl
+++ b/src/bam/bam.jl
@ -6,6 +6,7 @@ module BAM
 using BioGenerics
 using GenomicFeatures
 using XAM.SAM
+import ..XAM: flag

 import BGZFStreams
 import BioAlignments
--- a/src/bam/overlap.jl
+++ b/src/bam/overlap.jl
@ -50,12 +50,15 @@ function Base.iterate(iter::OverlapIterator)
    if refindex === nothing
        throw(ArgumentError("sequence name $(iter.refname) is not found in the header"))
    end
-    @assert iter.reader.index !== nothing
+
+    @assert iter.reader.index !== nothing "Reader index cannot be nothing."
+
    chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval)
-    if !isempty(chunks)
-        seek(iter.reader, first(chunks).start)
+    if isempty(chunks)
+        return nothing
    end
    state = OverlapIteratorState(refindex, chunks, 1, Record())
+    seek(iter.reader, state.chunks[state.chunkid].start)
    return iterate(iter, state)
 end

@ -65,7 +68,7 @@ function Base.iterate(iter::OverlapIterator, state)
        while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop
            read!(iter.reader, state.record)
            c = compare_intervals(state.record, (state.refindex, iter.interval))
-            if c == 0
+            if c == 0  # overlapping
                return copy(state.record), state
            end
            if c > 0
@ -78,6 +81,7 @@ function Base.iterate(iter::OverlapIterator, state)
            seek(iter.reader, state.chunks[state.chunkid].start)
        end
    end
+    # no more overlapping records
    return nothing
 end

@ -89,7 +93,7 @@ function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}})
        return -1
    end

-    if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2]))
+    if rid > interval[1] || (rid == interval[1] && leftposition(record) > last(interval[2]))
        # strictly right
        return +1
    end
--- a/src/bam/record.jl
+++ b/src/bam/record.jl
@ -11,18 +11,21 @@ mutable struct Record
    block_size::Int32
    refid::Int32
    pos::Int32
-    bin_mq_nl::UInt32
-    flag_nc::UInt32
+    l_read_name::UInt8
+    mapq::UInt8
+    bin::UInt16
+    n_cigar_op::UInt16
+    flag::UInt16
    l_seq::Int32
    next_refid::Int32
    next_pos::Int32
    tlen::Int32
    # variable length data
    data::Vector{UInt8}
-    reader::Reader
+    reader::Union{Reader, Nothing}

    function Record()
-        return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
+        return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[])
    end
 end

@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record)
    return a.block_size == b.block_size &&
        a.refid         == b.refid &&
        a.pos           == b.pos &&
-        a.bin_mq_nl     == b.bin_mq_nl &&
-        a.flag_nc       == b.flag_nc &&
+        a.l_read_name   == b.l_read_name &&
+        a.mapq          == b.mapq &&
+        a.bin           == b.bin &&
+        a.n_cigar_op    == b.n_cigar_op &&
+        a.flag          == b.flag &&
        a.l_seq         == b.l_seq &&
        a.next_refid    == b.next_refid &&
        a.next_pos      == b.next_pos &&
@ -60,19 +66,13 @@ end

 function Base.copy(record::Record)
    copy = Record()
-    copy.block_size = record.block_size
-    copy.refid      = record.refid
-    copy.pos        = record.pos
-    copy.bin_mq_nl  = record.bin_mq_nl
-    copy.flag_nc    = record.flag_nc
-    copy.l_seq      = record.l_seq
-    copy.next_refid = record.next_refid
-    copy.next_pos   = record.next_pos
-    copy.tlen       = record.tlen
-    copy.data       = record.data[1:data_size(record)]
-    if isdefined(record, :reader)
-        copy.reader = record.reader
+    GC.@preserve copy record begin
+    	dst_pointer = Ptr{UInt8}(pointer_from_objref(copy))
+    	src_pointer = Ptr{UInt8}(pointer_from_objref(record))
+    	unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES)
    end
+    copy.data       = record.data[1:data_size(record)]
+    copy.reader     = record.reader
    return copy
 end

@ -80,15 +80,18 @@ function Base.empty!(record::Record)
    record.block_size = 0
    record.refid      = 0
    record.pos        = 0
-    record.bin_mq_nl  = 0
-    record.flag_nc    = 0
+    record.l_read_name = 0
+    record.mapq = 0
+    record.bin = 0
+    record.flag = 0
+    record.n_cigar_op = 0
    record.l_seq      = 0
    record.next_refid = 0
    record.next_pos   = 0
    record.tlen       = 0

    #Note: data will be overwritten and indexed using data_size.
-    
+
    return record
 end

@ -125,14 +128,9 @@ end
 # Accessor Fuctions
 # -----------------

-"""
-    flag(record::Record)::UInt16
-
-Get the bitwise flag of `record`.
-"""
 function flag(record::Record)::UInt16
    checkfilled(record)
-    return UInt16(record.flag_nc >> 16)
+    return record.flag
 end

 function hasflag(record::Record)
@ -318,7 +316,7 @@ end
 Get the mapping quality of `record`.
 """
 function mappingquality(record::Record)::UInt8
-    return UInt8((record.bin_mq_nl >> 8) & 0xff)
+    return record.mapq
 end

 function hasmappingquality(record::Record)
@ -397,7 +395,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n)
 end

 function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int}
-    cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF
+    cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op
    if !checkCG
        return cigaridx, nops
    end
@ -507,13 +505,16 @@ function hastemplength(record::Record)
 end

 """
-    sequence(record::Record)::BioSequences.LongDNASeq
+    sequence(record::Record)::BioSequences.LongDNA{4}

 Get the segment sequence of `record`.
 """
-function sequence(record::Record)::BioSequences.LongDNASeq
+function sequence(record::Record)
    checkfilled(record)
    seqlen = seqlength(record)
+    if seqlen == 0
+        return nothing
+    end
    data = Vector{UInt64}(undef, cld(seqlen, 16))
    src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1)
    for i in 1:lastindex(data)
@ -521,7 +522,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq
        x = unsafe_load(src, i)
        data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4
    end
-    return BioSequences.LongDNASeq(data, 1:seqlen, false)
+    return BioSequences.LongDNA{4}(data, UInt(seqlen))
 end

 function hassequence(record::Record)
@ -543,15 +544,15 @@ function hasseqlength(record::Record)
 end

 """
-    quality(record::Record)::Vector{UInt8}
+    quality(record::Record)

-Get the base quality of  `record`.
+Get the base quality of `record`.
 """
-function quality(record::Record)::Vector{UInt8}
+function quality(record::Record)
    checkfilled(record)
    seqlen = seqlength(record)
    offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2)
-    return [reinterpret(Int8, record.data[i+offset]) for i in 1:seqlen]
+    return record.data[(1+offset):(seqlen+offset)]
 end

 function hasquality(record::Record)
@ -661,5 +662,5 @@ end

 # Return the length of the read name.
 function seqname_length(record::Record)
-    return record.bin_mq_nl & 0xff
+    return record.l_read_name
 end
--- a/src/bam/writer.jl
+++ b/src/bam/writer.jl
@ -37,7 +37,7 @@ function Base.write(writer::Writer, record::Record)
 end

 function write_header(stream, header, refseqnames, refseqlens)
-    @assert length(refseqnames) == length(refseqlens)
+    @assert length(refseqnames) == length(refseqlens) "Lengths of refseq names and lengths must match."
    n = 0

    # magic bytes
--- a/src/sam/flags.jl
+++ b/src/sam/flags.jl
@ -16,11 +16,14 @@ for (name, bits, doc) in [
        (:QCFAIL,        UInt16(0x200), "QC failure"                                                                ),
        (:DUP,           UInt16(0x400), "optical or PCR duplicate"                                                  ),
        (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment"                                                   ),]
-    @assert bits isa UInt16
+    @assert bits isa UInt16 "The bits must be of type UInt16."
    sym = Symbol("FLAG_", name)
-    doc = string(@sprintf("0x%04x: ", bits), doc)
+    docstring = """    $sym
+    SAM/BAM flag: $doc
+
+    See also: [`flag`](@ref)
+    """
    @eval begin
-        @doc $(doc) const $(sym) = $(bits)
+        @doc $(docstring) const $(sym) = $(bits)
    end
 end
-
--- a/src/sam/header.jl
+++ b/src/sam/header.jl
@ -38,7 +38,7 @@ function Base.iterate(header::Header, i=1)
 end

 """
-    find(header::Header, key::AbstractString)::Vector{MetaInfo}
+    findall(header::Header, key::AbstractString)::Vector{MetaInfo}

 Find metainfo objects satisfying `SAM.tag(metainfo) == key`.
 """
--- a/src/sam/reader.jl
+++ b/src/sam/reader.jl
@ -10,12 +10,16 @@ function Reader(state::State{S}) where {S <: TranscodingStream}

    rdr = Reader(state, Header())

-    cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))
+    cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum))

-    rdr.state.state = sam_machine_body.start_state
+    rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body.
    rdr.state.linenum = ln
    rdr.state.filled = false

+    if cs != -1 && cs != 0 #Note: the header is finished when the state machine fails to transition after a new line (state 1).
+        throw(ArgumentError("Malformed SAM file header at line $(ln). Machine failed to transition from state $(cs)."))
+    end
+
    return rdr
 end

@ -65,18 +69,18 @@ end

 function index!(record::MetaInfo)
    stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
-    found = index!(stream, record)
-    if !found
-        throw(ArgumentError("invalid SAM metadata"))
+    cs = index!(stream, record)
+    if cs != 0
+        throw(ArgumentError("Invalid SAM metadata. Machine failed to transition from state $(cs)."))
    end
    return record
 end

 function index!(record::Record)
    stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
-    found = index!(stream, record)
-    if !found
-        throw(ArgumentError("invalid SAM record"))
+    cs = index!(stream, record)
+    if cs != 0
+        throw(ArgumentError("Invalid SAM record. Machine failed to transition from state $(cs)."))
    end
    return record
 end
@ -110,5 +114,6 @@ function Base.read!(rdr::Reader, record::Record)
        throw(EOFError())
    end

-    throw(ArgumentError("malformed SAM file"))
+    throw(ArgumentError("Malformed SAM file record at line $(ln). Machine failed to transition from state $(cs)."))
+
 end
--- a/src/sam/readrecord.jl
+++ b/src/sam/readrecord.jl
@ -41,7 +41,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
        co.actions[:enter] = [:pos1]
        co.actions[:exit]  = [:metainfo_tag]

-        comment = re"[^\r\n]*"
+        comment = re"[^\r\n]*" # Note: Only single line comments are allowed.
        comment.actions[:enter] = [:pos1]
        comment.actions[:exit]  = [:metainfo_val]

@ -137,7 +137,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_
    header = rep(cat(metainfo, newline))
    header.actions[:exit] = [:header]

-    body = record * rep(newline * record) * opt(newline)
+    body = rep(cat(record, newline))
    body.actions[:exit]  = [:body]

    sam = cat(header, body)
@ -168,13 +168,6 @@ function appendfrom!(dst, dpos, src, spos, n)
    return dst
 end

-const action_metainfo = quote
-    appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
-    metainfo.filled = 1:(p-@markpos)
-
-    found_metainfo = true
-end
-
 const sam_actions_metainfo = Dict(
    :mark => :(@mark),
    :pos1  => :(pos1 = @relpos(p)),
@ -183,7 +176,10 @@ const sam_actions_metainfo = Dict(
    :metainfo_val => :(metainfo.val = pos1:@relpos(p-1)),
    :metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))),
    :metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))),
-    :metainfo => action_metainfo
+    :metainfo => quote
+        appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos)
+        metainfo.filled = 1:(p-@markpos)
+    end
 )

 const sam_actions_header = merge(
@ -191,16 +187,11 @@ const sam_actions_header = merge(
    Dict(
        :countline => :(linenum += 1),
        :metainfo => quote
-            $(action_metainfo)
+            $(sam_actions_metainfo[:metainfo])
            push!(header, metainfo)
            metainfo = MetaInfo()
        end,
-        :header => quote
-
-            finish_header = true
-
-            @escape
-        end
+        :header => :(@escape)
    )
 )

@ -222,9 +213,6 @@ const sam_actions_record = Dict(
    :record       => quote
        appendfrom!(record.data, 1, data, @markpos, p-@markpos)
        record.filled = 1:(p-@markpos)
-
-        found_record = true
-        @escape
    end
 )

@ -232,18 +220,15 @@ const sam_actions_body = merge(
    sam_actions_record,
    Dict(
        :countline => :(linenum += 1),
-        :body => quote
-            finish_body = true
+        :record => quote
+            found_record = true
+            $(sam_actions_record[:record])
            @escape
-        end
+        end,
+        :body => :(@escape)
    )
 )

-# const sam_actions = merge(
-#     sam_actions_header,
-#     sam_actions_body
-# )
-
 const sam_context = Automa.CodeGenContext(
    generator = :goto,
    checkbounds = false,
@ -253,43 +238,24 @@ const sam_context = Automa.CodeGenContext(
 const sam_initcode_metainfo = quote
    pos1 = 0
    pos2 = 0
-    found_metainfo = false
-end
-
-const sam_initcode_record = quote
-    pos = 0
-    found_record = false
 end

 const sam_initcode_header = quote
    $(sam_initcode_metainfo)
    metainfo = MetaInfo()
-    finish_header = false
    cs, linenum = state
 end

+const sam_initcode_record = quote
+    pos = 0
+end
+
 const sam_initcode_body = quote
    $(sam_initcode_record)
-    finish_body = false
+    found_record = false
    cs, linenum = state
 end

-const sam_loopcode_metainfo = quote
-
-    if cs < 0
-        throw(ArgumentError("malformed metainfo at pos $(p)"))
-    end
-
-    if found_metainfo
-        @goto __return__
-    end
-end
-
-const sam_returncode_metainfo = quote
-    return found_metainfo
-end
-
-
 Automa.Stream.generate_reader(
    :index!,
    sam_machine_metainfo,
@ -297,19 +263,10 @@ Automa.Stream.generate_reader(
    actions = sam_actions_metainfo,
    context = sam_context,
    initcode = sam_initcode_metainfo,
-    loopcode = sam_loopcode_metainfo,
-    returncode = sam_returncode_metainfo
 ) |> eval

-const sam_loopcode_header = quote
-
-    if finish_header
-        @goto __return__
-    end
-end
-
 const sam_returncode_header = quote
-    return cs, linenum, finish_header
+    return cs, linenum
 end

 Automa.Stream.generate_reader(
@ -319,38 +276,13 @@ Automa.Stream.generate_reader(
    actions = sam_actions_header,
    context = sam_context,
    initcode = sam_initcode_header,
-    loopcode = sam_loopcode_header,
    returncode = sam_returncode_header
 ) |> eval

-
-const sam_loopcode_record = quote
-
-    if cs < 0
-        throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)"))
-    end
-
-    # # if cs != 0
-    # #     throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)])))))
-    # # end
-
+const sam_loopcode_body = quote
    if found_record
        @goto __return__
    end
-
-end
-
-const sam_loopcode_body = quote
-
-    $(sam_loopcode_record)
-
-    if finish_body
-        @goto __return__
-    end
-end
-
-const sam_returncode_record = quote
-    return found_record
 end

 Automa.Stream.generate_reader(
@ -360,12 +292,9 @@ Automa.Stream.generate_reader(
    actions = sam_actions_record,
    context = sam_context,
    initcode = sam_initcode_record,
-    loopcode = sam_loopcode_record,
-    returncode = sam_returncode_record
 ) |> eval


-
 const sam_returncode_body = quote
    return cs, linenum, found_record
 end
--- a/src/sam/record.jl
+++ b/src/sam/record.jl
@ -2,10 +2,11 @@
 # ==========

 mutable struct Record
-    # data and filled range
+    # Data and filled range.
    data::Vector{UInt8}
-    filled::UnitRange{Int}
-    # indexes
+    filled::UnitRange{Int} # Note: Specifies the data in use.
+
+    # Mandatory fields.
    qname::UnitRange{Int}
    flag::UnitRange{Int}
    rname::UnitRange{Int}
@ -17,6 +18,8 @@ mutable struct Record
    tlen::UnitRange{Int}
    seq::UnitRange{Int}
    qual::UnitRange{Int}
+
+    # Auxiliary fields.
    fields::Vector{UnitRange{Int}}
 end

@ -147,11 +150,6 @@ end
 # Accessor Functions
 # ------------------

-"""
-    flag(record::Record)::UInt16
-
-Get the bitwise flag of `record`.
-"""
 function flag(record::Record)::UInt16
    checkfilled(record)
    return unsafe_parse_decimal(UInt16, record.data, record.flag)
@ -206,9 +204,9 @@ Get the 1-based leftmost mapping position of `record`.
 function position(record::Record)::Int
    checkfilled(record)
    pos = unsafe_parse_decimal(Int, record.data, record.pos)
-    if pos == 0
-        missingerror(:position)
-    end
+    # if pos == 0
+    #     missingerror(:position)
+    # end
    return pos
 end

@ -263,9 +261,9 @@ Get the position of the mate/next read of `record`.
 function nextposition(record::Record)::Int
    checkfilled(record)
    pos = unsafe_parse_decimal(Int, record.data, record.pnext)
-    if pos == 0
-        missingerror(:nextposition)
-    end
+    # if pos == 0
+    #     missingerror(:nextposition)
+    # end
    return pos
 end

@ -299,7 +297,8 @@ Get the CIGAR string of `record`.
 function cigar(record::Record)::String
    checkfilled(record)
    if ismissing(record, record.cigar)
-        missingerror(:cigar)
+        # missingerror(:cigar)
+        return ""
    end
    return String(record.data[record.cigar])
 end
@ -377,9 +376,9 @@ Get the template length of `record`.
 function templength(record::Record)::Int
    checkfilled(record)
    len = unsafe_parse_decimal(Int, record.data, record.tlen)
-    if len == 0
-        missingerror(:tlen)
-    end
+    # if len == 0
+    #     missingerror(:tlen)
+    # end
    return len
 end

@ -388,17 +387,18 @@ function hastemplength(record::Record)
 end

 """
-    sequence(record::Record)::BioSequences.LongDNASeq
+    sequence(record::Record)::BioSequences.LongDNA{4}

 Get the segment sequence of `record`.
 """
-function sequence(record::Record)::BioSequences.LongDNASeq
+function sequence(record::Record)
    checkfilled(record)
    if ismissing(record, record.seq)
-        missingerror(:sequence)
+        # missingerror(:sequence)
+        return nothing
    end
    seqlen = length(record.seq)
-    ret = BioSequences.LongDNASeq(seqlen)
+    ret = BioSequences.LongDNA{4}(undef, seqlen)
    copyto!(ret, 1, record.data, first(record.seq), seqlen)
    return ret
 end
@ -494,7 +494,7 @@ function Base.getindex(record::Record, tag::AbstractString)
    end

    if typ == UInt8('A')
-        @assert lo == hi
+        @assert lo == hi "Values lo and hi must be equivalent."
        return Char(record.data[lo])
    end
    if typ == UInt8('i')
--- a/src/sam/sam.jl
+++ b/src/sam/sam.jl
@ -11,6 +11,7 @@ import BioGenerics.Exceptions: missingerror
 import BioGenerics.Automa: State
 import BioSequences
 import TranscodingStreams: TranscodingStreams, TranscodingStream
+import ..XAM: flag

 using Printf: @sprintf

@ -46,20 +47,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I
    return sign * x
 end

-# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams).
-# import BufferedStreams: BufferedStreams, BufferedInputStream
-# # A type keeping track of a ragel-based parser's state.
-# mutable struct State{T<:BufferedInputStream}
-#     stream::T      # input stream
-#     cs::Int        # current DFA state of Ragel
-#     linenum::Int   # line number: parser is responsible for updating this
-#     finished::Bool # true if finished (regardless of where in the stream we are)
-# end
-
-# function State(initstate::Int, input::BufferedInputStream)
-#     return State(input, initstate, 1, false)
-# end
-

 include("flags.jl")
 include("metainfo.jl")
--- a/test/runtests.jl
+++ b/test/runtests.jl
@ -1,4 +1,5 @@
 using Test
+using Documenter

 using BioGenerics
 using FormatSpecimens
@ -23,6 +24,13 @@ function randrange(range)
 end


-include("test_sam.jl")
-include("test_bam.jl")
-include("test_issues.jl")
+@testset "XAM" begin
+    include("test_sam.jl")
+    include("test_bam.jl")
+    include("test_issues.jl")
+    include("test_crosscheck.jl")
+
+    # Include doctests.
+    DocMeta.setdocmeta!(XAM, :DocTestSetup, :(using XAM); recursive=true)
+    doctest(XAM; manual = false)
+end
--- a/test/test_bam.jl
+++ b/test/test_bam.jl
@ -104,9 +104,12 @@
            unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize)
        end
        new_record = convert(BAM.Record, array)
-        @test record.bin_mq_nl == new_record.bin_mq_nl
+        @test record.l_read_name == new_record.l_read_name
+        @test record.mapq == new_record.mapq
+        @test record.bin == new_record.bin
        @test record.block_size == new_record.block_size
-        @test record.flag_nc == new_record.flag_nc
+        @test record.flag == new_record.flag
+        @test record.n_cigar_op == new_record.n_cigar_op
        @test record.l_seq == new_record.l_seq
        @test record.next_refid == new_record.next_refid
        @test record.next_pos == new_record.next_pos
@ -162,18 +165,21 @@
        if length(xs) != length(ys)
            return false
        end
-        for (x, y) in zip(xs, ys)
+        for (a, b) in zip(xs, ys)
            if !(
-                x.block_size == y.block_size &&
-                x.refid      == y.refid &&
-                x.pos        == y.pos &&
-                x.bin_mq_nl  == y.bin_mq_nl &&
-                x.flag_nc    == y.flag_nc &&
-                x.l_seq      == y.l_seq &&
-                x.next_refid == y.next_refid &&
-                x.next_pos   == y.next_pos &&
-                x.tlen       == y.tlen &&
-                x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)])
+		        a.block_size    == b.block_size &&
+		        a.refid         == b.refid &&
+		        a.pos           == b.pos &&
+		        a.l_read_name   == b.l_read_name &&
+		        a.mapq          == b.mapq &&
+		        a.bin           == b.bin &&
+		        a.n_cigar_op    == b.n_cigar_op &&
+		        a.flag          == b.flag &&
+		        a.l_seq         == b.l_seq &&
+		        a.next_refid    == b.next_refid &&
+		        a.next_pos      == b.next_pos &&
+		        a.tlen          == b.tlen &&
+                a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)])
                return false
            end
        end
--- a/test/test_crosscheck.jl
+++ b/test/test_crosscheck.jl
@ -0,0 +1,54 @@
+@testset "Cross Check Properties" begin
+
+	Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl.
+	Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl
+
+	function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol)
+
+		bam_property = getproperty(XAM.BAM, property)
+		sam_property = getproperty(XAM.SAM, property)
+
+		if bam_property(bam) != sam_property(sam)
+			@warn "$property result is not the same" bam_property(bam) sam_property(sam)
+			return false
+		end
+
+		return true
+	end
+
+	samdir = path_of_format("SAM")
+	bamdir = path_of_format("BAM")
+
+	filenames = [
+		"ce#1",
+		"ce#2",
+		"ce#5",
+		"ce#5b",
+		"ce#unmap",
+		"ce#unmap1",
+		"ce#unmap2",
+	]
+
+	properties = [
+		:position,# POS
+		:tempname,# QNAME
+		:mappingquality,# MAPQ
+		:cigar, # CIGAR
+		:flag, # FLAG
+		:sequence, # SEQ
+		:nextposition, # PNEXT
+		:templength, # TLEN
+	]
+
+	for filename in filenames
+
+		records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam")))
+		records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam")))
+
+		for (bam, sam) in zip(records_bam, records_sam)
+			@test all(crosscheck.(bam, sam, properties)) == true
+		end
+
+	end
+
+end # testset Crosscheck
--- a/test/test_sam.jl
+++ b/test/test_sam.jl
@ -199,6 +199,6 @@

        records = open(collect, SAM.Reader, file_sam)
        @test records == []
-        
+
    end
 end