From 69e77a234c47a5762572bb1a6787001e35e4580e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Thu, 25 Jun 2020 02:09:03 +1000 Subject: [PATCH 01/27] Note --- src/sam/readrecord.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 4bc0761..205de7b 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -41,7 +41,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_ co.actions[:enter] = [:pos1] co.actions[:exit] = [:metainfo_tag] - comment = re"[^\r\n]*" + comment = re"[^\r\n]*" # Note: Only single line comments are allowed. comment.actions[:enter] = [:pos1] comment.actions[:exit] = [:metainfo_val] From dce44b3f558d06cc1b9f7e425d67bc0f03d7f7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Thu, 25 Jun 2020 02:10:26 +1000 Subject: [PATCH 02/27] Tidy --- src/sam/sam.jl | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 9710269..97b8a05 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -46,20 +46,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I return sign * x end -# #TODO: update BioCore.Ragel.State (will likely change with TrnscodingStreams). -# import BufferedStreams: BufferedStreams, BufferedInputStream -# # A type keeping track of a ragel-based parser's state. -# mutable struct State{T<:BufferedInputStream} -# stream::T # input stream -# cs::Int # current DFA state of Ragel -# linenum::Int # line number: parser is responsible for updating this -# finished::Bool # true if finished (regardless of where in the stream we are) -# end - -# function State(initstate::Int, input::BufferedInputStream) -# return State(input, initstate, 1, false) -# end - include("flags.jl") include("metainfo.jl") From dde235fbc8313432dc7b16f94310128074d875e4 Mon Sep 17 00:00:00 2001 From: Jonathan Bieler Date: Fri, 26 Jul 2019 10:43:36 +0200 Subject: [PATCH 03/27] improve BAM.quality performance --- src/bam/record.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bam/record.jl b/src/bam/record.jl index 56e048d..a70bb17 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -543,15 +543,15 @@ function hasseqlength(record::Record) end """ - quality(record::Record)::Vector{UInt8} + quality(record::Record) -Get the base quality of `record`. +Get the base quality of `record`. """ -function quality(record::Record)::Vector{UInt8} +function quality(record::Record) checkfilled(record) seqlen = seqlength(record) offset = seqname_length(record) + n_cigar_op(record, false) * 4 + cld(seqlen, 2) - return [reinterpret(Int8, record.data[i+offset]) for i in 1:seqlen] + return record.data[(1+offset):(seqlen+offset)] end function hasquality(record::Record) From 433b8d14e8e11735d4196b71bf29f83482cd3ebd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Thu, 13 Aug 2020 16:45:51 +1000 Subject: [PATCH 04/27] Begin crosschecks --- test/runtests.jl | 1 + test/test_crosscheck.jl | 54 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 test/test_crosscheck.jl diff --git a/test/runtests.jl b/test/runtests.jl index 970c3a1..beede29 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,3 +25,4 @@ end include("test_sam.jl") include("test_bam.jl") +include("test_crosscheck.jl") diff --git a/test/test_crosscheck.jl b/test/test_crosscheck.jl new file mode 100644 index 0000000..0e853ef --- /dev/null +++ b/test/test_crosscheck.jl @@ -0,0 +1,54 @@ +@testset "Cross Check Properties" begin + + Broadcast.broadcastable(x::XAM.BAM.Record) = Ref(x) #TODO: consider moving to XAM.jl. + Broadcast.broadcastable(x::XAM.SAM.Record) = Ref(x) #TODO: consider moving to XAM.jl + + function crosscheck(bam::BAM.Record, sam::SAM.Record, property::Symbol) + + bam_property = getproperty(XAM.BAM, property) + sam_property = getproperty(XAM.SAM, property) + + if bam_property(bam) != sam_property(sam) + @warn "$property result is not the same" bam_property(bam) sam_property(sam) + return false + end + + return true + end + + samdir = path_of_format("SAM") + bamdir = path_of_format("BAM") + + filenames = [ + "ce#1", + "ce#2", + "ce#5", + "ce#5b", + "ce#unmap", + "ce#unmap1", + "ce#unmap2", + ] + + properties = [ + :position,# POS + :tempname,# QNAME + :mappingquality,# MAPQ + :cigar, # CIGAR + :flag, # FLAG + :sequence, # SEQ + :nextposition, # PNEXT + :templength, # TLEN + ] + + for filename in filenames + + records_bam = collect(open(BAM.Reader, joinpath(bamdir, filename * ".bam"))) + records_sam = collect(open(SAM.Reader, joinpath(samdir, filename * ".sam"))) + + for (bam, sam) in zip(records_bam, records_sam) + @test all(crosscheck.(bam, sam, properties)) == true + end + + end + +end # testset Crosscheck From d38c2becf0dc15b27bd40237f313db74cae593b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Thu, 13 Aug 2020 14:06:43 +1000 Subject: [PATCH 05/27] Minimal adjustements --- src/bam/record.jl | 5 ++++- src/sam/record.jl | 26 ++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/bam/record.jl b/src/bam/record.jl index a70bb17..ad911b4 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -511,9 +511,12 @@ end Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.LongDNASeq +function sequence(record::Record) checkfilled(record) seqlen = seqlength(record) + if seqlen == 0 + return nothing + end data = Vector{UInt64}(undef, cld(seqlen, 16)) src::Ptr{UInt64} = pointer(record.data, seqname_length(record) + n_cigar_op(record, false) * 4 + 1) for i in 1:lastindex(data) diff --git a/src/sam/record.jl b/src/sam/record.jl index 2d94a12..dcddf42 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -206,9 +206,9 @@ Get the 1-based leftmost mapping position of `record`. function position(record::Record)::Int checkfilled(record) pos = unsafe_parse_decimal(Int, record.data, record.pos) - if pos == 0 - missingerror(:position) - end + # if pos == 0 + # missingerror(:position) + # end return pos end @@ -263,9 +263,9 @@ Get the position of the mate/next read of `record`. function nextposition(record::Record)::Int checkfilled(record) pos = unsafe_parse_decimal(Int, record.data, record.pnext) - if pos == 0 - missingerror(:nextposition) - end + # if pos == 0 + # missingerror(:nextposition) + # end return pos end @@ -299,7 +299,8 @@ Get the CIGAR string of `record`. function cigar(record::Record)::String checkfilled(record) if ismissing(record, record.cigar) - missingerror(:cigar) + # missingerror(:cigar) + return "" end return String(record.data[record.cigar]) end @@ -377,9 +378,9 @@ Get the template length of `record`. function templength(record::Record)::Int checkfilled(record) len = unsafe_parse_decimal(Int, record.data, record.tlen) - if len == 0 - missingerror(:tlen) - end + # if len == 0 + # missingerror(:tlen) + # end return len end @@ -392,10 +393,11 @@ end Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.LongDNASeq +function sequence(record::Record) checkfilled(record) if ismissing(record, record.seq) - missingerror(:sequence) + # missingerror(:sequence) + return nothing end seqlen = length(record.seq) ret = BioSequences.LongDNASeq(seqlen) From b086fefbd5db44b68ac5945af3c28fbccb3ee30e Mon Sep 17 00:00:00 2001 From: Jakob Nissen Date: Tue, 28 Jul 2020 13:17:50 +0200 Subject: [PATCH 06/27] Make BAM record layout match BAM specs --- src/bam/record.jl | 51 +++++++++++++++++++++++++---------------------- test/test_bam.jl | 32 +++++++++++++++++------------ 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/src/bam/record.jl b/src/bam/record.jl index ad911b4..61a5c65 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -11,18 +11,21 @@ mutable struct Record block_size::Int32 refid::Int32 pos::Int32 - bin_mq_nl::UInt32 - flag_nc::UInt32 + l_read_name::UInt8 + mapq::UInt8 + bin::UInt16 + n_cigar_op::UInt16 + flag::UInt16 l_seq::Int32 next_refid::Int32 next_pos::Int32 tlen::Int32 # variable length data data::Vector{UInt8} - reader::Reader + reader::Union{Reader, Nothing} function Record() - return new(0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[]) + return new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, UInt8[]) end end @@ -49,8 +52,11 @@ function Base.:(==)(a::Record, b::Record) return a.block_size == b.block_size && a.refid == b.refid && a.pos == b.pos && - a.bin_mq_nl == b.bin_mq_nl && - a.flag_nc == b.flag_nc && + a.l_read_name == b.l_read_name && + a.mapq == b.mapq && + a.bin == b.bin && + a.n_cigar_op == b.n_cigar_op && + a.flag == b.flag && a.l_seq == b.l_seq && a.next_refid == b.next_refid && a.next_pos == b.next_pos && @@ -60,19 +66,13 @@ end function Base.copy(record::Record) copy = Record() - copy.block_size = record.block_size - copy.refid = record.refid - copy.pos = record.pos - copy.bin_mq_nl = record.bin_mq_nl - copy.flag_nc = record.flag_nc - copy.l_seq = record.l_seq - copy.next_refid = record.next_refid - copy.next_pos = record.next_pos - copy.tlen = record.tlen - copy.data = record.data[1:data_size(record)] - if isdefined(record, :reader) - copy.reader = record.reader + GC.@preserve copy record begin + dst_pointer = Ptr{UInt8}(pointer_from_objref(copy)) + src_pointer = Ptr{UInt8}(pointer_from_objref(record)) + unsafe_copyto!(dst_pointer, src_pointer, FIXED_FIELDS_BYTES) end + copy.data = record.data[1:data_size(record)] + copy.reader = record.reader return copy end @@ -80,8 +80,11 @@ function Base.empty!(record::Record) record.block_size = 0 record.refid = 0 record.pos = 0 - record.bin_mq_nl = 0 - record.flag_nc = 0 + record.l_read_name = 0 + record.mapq = 0 + record.bin = 0 + record.flag = 0 + record.n_cigar_op = 0 record.l_seq = 0 record.next_refid = 0 record.next_pos = 0 @@ -132,7 +135,7 @@ Get the bitwise flag of `record`. """ function flag(record::Record)::UInt16 checkfilled(record) - return UInt16(record.flag_nc >> 16) + return record.flag end function hasflag(record::Record) @@ -318,7 +321,7 @@ end Get the mapping quality of `record`. """ function mappingquality(record::Record)::UInt8 - return UInt8((record.bin_mq_nl >> 8) & 0xff) + return record.mapq end function hasmappingquality(record::Record) @@ -397,7 +400,7 @@ function extract_cigar_rle(data::Vector{UInt8}, offset, n) end function cigar_position(record::Record, checkCG::Bool = true)::Tuple{Int, Int} - cigaridx, nops = seqname_length(record) + 1, record.flag_nc & 0xFFFF + cigaridx, nops = seqname_length(record) + 1, record.n_cigar_op if !checkCG return cigaridx, nops end @@ -664,5 +667,5 @@ end # Return the length of the read name. function seqname_length(record::Record) - return record.bin_mq_nl & 0xff + return record.l_read_name end diff --git a/test/test_bam.jl b/test/test_bam.jl index 186d231..beb6d03 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -104,9 +104,12 @@ unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize) end new_record = convert(BAM.Record, array) - @test record.bin_mq_nl == new_record.bin_mq_nl + @test record.l_read_name == new_record.l_read_name + @test record.mapq == new_record.mapq + @test record.bin == new_record.bin @test record.block_size == new_record.block_size - @test record.flag_nc == new_record.flag_nc + @test record.flag == new_record.flag + @test record.n_cigar_op == new_record.n_cigar_op @test record.l_seq == new_record.l_seq @test record.next_refid == new_record.next_refid @test record.next_pos == new_record.next_pos @@ -162,18 +165,21 @@ if length(xs) != length(ys) return false end - for (x, y) in zip(xs, ys) + for (a, b) in zip(xs, ys) if !( - x.block_size == y.block_size && - x.refid == y.refid && - x.pos == y.pos && - x.bin_mq_nl == y.bin_mq_nl && - x.flag_nc == y.flag_nc && - x.l_seq == y.l_seq && - x.next_refid == y.next_refid && - x.next_pos == y.next_pos && - x.tlen == y.tlen && - x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)]) + a.block_size == b.block_size && + a.refid == b.refid && + a.pos == b.pos && + a.l_read_name == b.l_read_name && + a.mapq == b.mapq && + a.bin == b.bin && + a.n_cigar_op == b.n_cigar_op && + a.flag == b.flag && + a.l_seq == b.l_seq && + a.next_refid == b.next_refid && + a.next_pos == b.next_pos && + a.tlen == b.tlen && + a.data[1:BAM.data_size(a)] == b.data[1:BAM.data_size(b)]) return false end end From c56eb0851a0876156656a002a119a311d596fe4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Wed, 28 Apr 2021 11:48:47 +1000 Subject: [PATCH 07/27] Update Continuous Integration workflows - Includes doctest in tests. --- .github/workflows/CompatHelper.yml | 25 +++++++++++++++++-------- .github/workflows/Documentation.yml | 19 +++++++------------ .github/workflows/TagBot.yml | 11 ++++++++--- .github/workflows/UnitTests.yml | 6 +++++- Project.toml | 3 ++- docs/Project.toml | 2 +- docs/make.jl | 2 ++ test/runtests.jl | 13 ++++++++++--- 8 files changed, 52 insertions(+), 29 deletions(-) diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 98d1792..1eb68ea 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,17 +1,26 @@ name: CompatHelper - on: schedule: - - cron: '0 0 * * *' - + - cron: 0 0 * * * + workflow_dispatch: jobs: CompatHelper: runs-on: ubuntu-latest steps: - - name: Add CompatHelper - run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")' - - name: Run CompatHelper + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "2" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main(master_branch = "master") + shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} - run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")' + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 85a6cd9..02e5911 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -1,5 +1,4 @@ -name: Build Documentation - +name: Documentation on: push: branches: @@ -10,17 +9,13 @@ on: pull_request: jobs: - build: + Documenter: + name: Documentation runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - - name: Install Dependencies - run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - name: Build and Deploy + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key - run: julia --color=yes --project=docs/ docs/make.jl + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 086a8e5..6d2efc1 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,12 +1,17 @@ name: TagBot + on: - schedule: - - cron: '0 0 * * *' + issue_comment: + types: + - created + workflow_dispatch: + jobs: TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} - ssh: ${{ secrets.TAGBOT_KEY }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 9d5c078..7b0b1fb 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -6,6 +6,7 @@ on: jobs: test: + name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} strategy: @@ -16,6 +17,9 @@ jobs: - '1' julia-arch: [x64, x86] os: [ubuntu-latest, windows-latest, macOS-latest] + exclude: + - os: macOS-latest + julia-arch: x86 experimental: [false] include: - julia-version: nightly @@ -31,7 +35,7 @@ jobs: with: version: ${{ matrix.julia-version }} - name: Run Tests - uses: julia-actions/julia-runtest@latest + uses: julia-actions/julia-runtest@v1 - name: Create CodeCov uses: julia-actions/julia-processcoverage@v1 - name: Upload CodeCov diff --git a/Project.toml b/Project.toml index 0c21bf9..473a4e3 100644 --- a/Project.toml +++ b/Project.toml @@ -26,8 +26,9 @@ TranscodingStreams = "0.6, 0.7, 0.8, 0.9" julia = "1" [extras] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["FormatSpecimens", "Test"] +test = ["Documenter", "FormatSpecimens", "Test"] diff --git a/docs/Project.toml b/docs/Project.toml index 3506869..b284c6f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,4 +3,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [compat] -Documenter = "0.24" +Documenter = "0.27" diff --git a/docs/make.jl b/docs/make.jl index fcbd601..db96e6e 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,6 +2,8 @@ using Pkg using Documenter, XAM makedocs( + checkdocs = :all, + linkcheck = true, format = Documenter.HTML( edit_link = "develop" ), diff --git a/test/runtests.jl b/test/runtests.jl index beede29..75bffe5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using Test +using Documenter using BioGenerics using FormatSpecimens @@ -23,6 +24,12 @@ function randrange(range) end -include("test_sam.jl") -include("test_bam.jl") -include("test_crosscheck.jl") +@testset "XAM" begin + include("test_sam.jl") + include("test_bam.jl") + include("test_crosscheck.jl") + + # Include doctests. + DocMeta.setdocmeta!(XAM, :DocTestSetup, :(using XAM); recursive=true) + doctest(XAM; manual = false) +end From fa10f3bfcfa0e0dec43d2059a57dcdd7d7939ea3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Thu, 24 Jun 2021 00:53:17 +1000 Subject: [PATCH 08/27] Readability --- src/bam/overlap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index c6cf8ac..959edc1 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -89,7 +89,7 @@ function compare_intervals(record::Record, interval::Tuple{Int,UnitRange{Int}}) return -1 end - if rid > interval[1] || (rid == interval[1] && position(record) > last(interval[2])) + if rid > interval[1] || (rid == interval[1] && leftposition(record) > last(interval[2])) # strictly right return +1 end From 8f26bbf4a728761de812dbfd296a286c988d7bbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Tue, 29 Jun 2021 12:54:08 +1000 Subject: [PATCH 09/27] Tidy state machines and improve error messages Makes better use of the existing flow in [Automa.jl](https://github.com/BioJulia/Automa.jl/blob/b6997f7415992a82e8538ca6ba8f5f3b5fc9a297/src/Stream.jl#L124-L127). --- src/sam/reader.jl | 21 ++++---- src/sam/readrecord.jl | 109 ++++++++---------------------------------- test/test_sam.jl | 2 +- 3 files changed, 33 insertions(+), 99 deletions(-) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index aa5546e..62a6ad5 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -10,12 +10,16 @@ function Reader(state::State{S}) where {S <: TranscodingStream} rdr = Reader(state, Header()) - cs, ln, f = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum)) + cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum)) rdr.state.state = sam_machine_body.start_state rdr.state.linenum = ln rdr.state.filled = false + if cs != -1 && cs != 0 #Note: the header is finished when the state machine fails to transition after a new line (state 1). + throw(ArgumentError("Malformed SAM file header at line $(ln). Machine failed to transition from state $(cs).")) + end + return rdr end @@ -65,18 +69,18 @@ end function index!(record::MetaInfo) stream = TranscodingStreams.NoopStream(IOBuffer(record.data)) - found = index!(stream, record) - if !found - throw(ArgumentError("invalid SAM metadata")) + cs = index!(stream, record) + if cs != 0 + throw(ArgumentError("Invalid SAM metadata. Machine failed to transition from state $(cs).")) end return record end function index!(record::Record) stream = TranscodingStreams.NoopStream(IOBuffer(record.data)) - found = index!(stream, record) - if !found - throw(ArgumentError("invalid SAM record")) + cs = index!(stream, record) + if cs != 0 + throw(ArgumentError("Invalid SAM record. Machine failed to transition from state $(cs).")) end return record end @@ -110,5 +114,6 @@ function Base.read!(rdr::Reader, record::Record) throw(EOFError()) end - throw(ArgumentError("malformed SAM file")) + throw(ArgumentError("Malformed SAM file record at line $(ln). Machine failed to transition from state $(cs).")) + end diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index c721efd..a3c91c2 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -137,7 +137,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_ header = rep(cat(metainfo, newline)) header.actions[:exit] = [:header] - body = record * rep(newline * record) * opt(newline) + body = rep(cat(record, newline)) body.actions[:exit] = [:body] sam = cat(header, body) @@ -168,13 +168,6 @@ function appendfrom!(dst, dpos, src, spos, n) return dst end -const action_metainfo = quote - appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos) - metainfo.filled = 1:(p-@markpos) - - found_metainfo = true -end - const sam_actions_metainfo = Dict( :mark => :(@mark), :pos1 => :(pos1 = @relpos(p)), @@ -183,7 +176,10 @@ const sam_actions_metainfo = Dict( :metainfo_val => :(metainfo.val = pos1:@relpos(p-1)), :metainfo_dict_key => :(push!(metainfo.dictkey, pos2:@relpos(p-1))), :metainfo_dict_val => :(push!(metainfo.dictval, pos2:@relpos(p-1))), - :metainfo => action_metainfo + :metainfo => quote + appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos) + metainfo.filled = 1:(p-@markpos) + end ) const sam_actions_header = merge( @@ -191,16 +187,11 @@ const sam_actions_header = merge( Dict( :countline => :(linenum += 1), :metainfo => quote - $(action_metainfo) + $(sam_actions_metainfo[:metainfo]) push!(header, metainfo) metainfo = MetaInfo() end, - :header => quote - - finish_header = true - - @escape - end + :header => :(@escape) ) ) @@ -222,9 +213,6 @@ const sam_actions_record = Dict( :record => quote appendfrom!(record.data, 1, data, @markpos, p-@markpos) record.filled = 1:(p-@markpos) - - found_record = true - @escape end ) @@ -232,18 +220,15 @@ const sam_actions_body = merge( sam_actions_record, Dict( :countline => :(linenum += 1), - :body => quote - finish_body = true + :record => quote + found_record = true + $(sam_actions_record[:record]) @escape - end + end, + :body => :(@escape) ) ) -# const sam_actions = merge( -# sam_actions_header, -# sam_actions_body -# ) - const sam_context = Automa.CodeGenContext( generator = :goto, checkbounds = false, @@ -253,43 +238,24 @@ const sam_context = Automa.CodeGenContext( const sam_initcode_metainfo = quote pos1 = 0 pos2 = 0 - found_metainfo = false -end - -const sam_initcode_record = quote - pos = 0 - found_record = false end const sam_initcode_header = quote $(sam_initcode_metainfo) metainfo = MetaInfo() - finish_header = false cs, linenum = state end +const sam_initcode_record = quote + pos = 0 +end + const sam_initcode_body = quote $(sam_initcode_record) - finish_body = false + found_record = false cs, linenum = state end -const sam_loopcode_metainfo = quote - - if cs < 0 - throw(ArgumentError("malformed metainfo at pos $(p)")) - end - - if found_metainfo - @goto __return__ - end -end - -const sam_returncode_metainfo = quote - return found_metainfo -end - - Automa.Stream.generate_reader( :index!, sam_machine_metainfo, @@ -297,19 +263,10 @@ Automa.Stream.generate_reader( actions = sam_actions_metainfo, context = sam_context, initcode = sam_initcode_metainfo, - loopcode = sam_loopcode_metainfo, - returncode = sam_returncode_metainfo ) |> eval -const sam_loopcode_header = quote - - if finish_header - @goto __return__ - end -end - const sam_returncode_header = quote - return cs, linenum, finish_header + return cs, linenum end Automa.Stream.generate_reader( @@ -319,38 +276,13 @@ Automa.Stream.generate_reader( actions = sam_actions_header, context = sam_context, initcode = sam_initcode_header, - loopcode = sam_loopcode_header, returncode = sam_returncode_header ) |> eval - -const sam_loopcode_record = quote - - if cs < 0 - throw(ArgumentError("malformed SAM record at position $(p), line $(linenum)")) - end - - # # if cs != 0 - # # throw(ArgumentError(string("failed to index ", $(record_type), " ~>", repr(String(data[p:min(p+7,p_end)]))))) - # # end - +const sam_loopcode_body = quote if found_record @goto __return__ end - -end - -const sam_loopcode_body = quote - - $(sam_loopcode_record) - - if finish_body - @goto __return__ - end -end - -const sam_returncode_record = quote - return found_record end Automa.Stream.generate_reader( @@ -360,12 +292,9 @@ Automa.Stream.generate_reader( actions = sam_actions_record, context = sam_context, initcode = sam_initcode_record, - loopcode = sam_loopcode_record, - returncode = sam_returncode_record ) |> eval - const sam_returncode_body = quote return cs, linenum, found_record end diff --git a/test/test_sam.jl b/test/test_sam.jl index c6fadc1..195288d 100644 --- a/test/test_sam.jl +++ b/test/test_sam.jl @@ -199,6 +199,6 @@ records = open(collect, SAM.Reader, file_sam) @test records == [] - + end end From 9d2a70531a32ee11770aca2f4397b2a00e8a9c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Sat, 3 Jul 2021 22:45:47 +1000 Subject: [PATCH 10/27] Return early --- src/bam/overlap.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index 959edc1..e6212ed 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -52,10 +52,11 @@ function Base.iterate(iter::OverlapIterator) end @assert iter.reader.index !== nothing chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval) - if !isempty(chunks) - seek(iter.reader, first(chunks).start) + if isempty(chunks) + return nothing end state = OverlapIteratorState(refindex, chunks, 1, Record()) + seek(iter.reader, state.chunks[state.chunkid].start) return iterate(iter, state) end @@ -65,7 +66,7 @@ function Base.iterate(iter::OverlapIterator, state) while BGZFStreams.virtualoffset(iter.reader.stream) < chunk.stop read!(iter.reader, state.record) c = compare_intervals(state.record, (state.refindex, iter.interval)) - if c == 0 + if c == 0 # overlapping return copy(state.record), state end if c > 0 @@ -78,6 +79,7 @@ function Base.iterate(iter::OverlapIterator, state) seek(iter.reader, state.chunks[state.chunkid].start) end end + # no more overlapping records return nothing end From 18dae0d45dc9432930fb7f880837d844c0b7ea9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Sun, 27 Jun 2021 20:41:58 +1000 Subject: [PATCH 11/27] Comments --- src/sam/reader.jl | 2 +- src/sam/record.jl | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 62a6ad5..46fd286 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -12,7 +12,7 @@ function Reader(state::State{S}) where {S <: TranscodingStream} cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum)) - rdr.state.state = sam_machine_body.start_state + rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body. rdr.state.linenum = ln rdr.state.filled = false diff --git a/src/sam/record.jl b/src/sam/record.jl index dcddf42..7dde515 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -2,10 +2,11 @@ # ========== mutable struct Record - # data and filled range + # Data and filled range. data::Vector{UInt8} - filled::UnitRange{Int} - # indexes + filled::UnitRange{Int} # Note: Specifies the data in use. + + # Mandatory fields. qname::UnitRange{Int} flag::UnitRange{Int} rname::UnitRange{Int} @@ -17,6 +18,8 @@ mutable struct Record tlen::UnitRange{Int} seq::UnitRange{Int} qual::UnitRange{Int} + + # Auxiliary fields. fields::Vector{UnitRange{Int}} end From a72fcf47cbfef82ee296ca28dff95e2a8ef7ad2b Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 22 Oct 2021 10:48:07 +0200 Subject: [PATCH 12/27] Improve documentation of flag function --- src/XAM.jl | 21 +++++++++++++++++++++ src/bam/bam.jl | 1 + src/bam/record.jl | 5 ----- src/sam/flags.jl | 8 ++++++-- src/sam/record.jl | 5 ----- src/sam/sam.jl | 1 + 6 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/XAM.jl b/src/XAM.jl index 8609747..2c5bad1 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -4,6 +4,27 @@ export SAM, BAM +""" + flag(record::Union{SAM.Record, BAM.Record})::UInt16 + +Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag +being OR'd together. The possible flags are: + + 0x0001 template having multiple segments in sequencing + 0x0002 each segment properly aligned according to the aligner + 0x0004 segment unmapped + 0x0008 next segment in the template unmapped + 0x0010 SEQ being reverse complemented + 0x0020 SEQ of the next segment in the template being reverse complemented + 0x0040 the first segment in the template + 0x0080 the last segment in the template + 0x0100 secondary alignment + 0x0200 not passing filters, such as platform/vendor quality controls + 0x0400 PCR or optical duplicate + 0x0800 supplementary alignment +""" +function flag end + include("sam/sam.jl") include("bam/bam.jl") diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 6fd1174..b3591d7 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,6 +6,7 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM +import ..XAM: flag import BGZFStreams import BioAlignments diff --git a/src/bam/record.jl b/src/bam/record.jl index 61a5c65..b545600 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -128,11 +128,6 @@ end # Accessor Fuctions # ----------------- -""" - flag(record::Record)::UInt16 - -Get the bitwise flag of `record`. -""" function flag(record::Record)::UInt16 checkfilled(record) return record.flag diff --git a/src/sam/flags.jl b/src/sam/flags.jl index 0b629d1..7b476fa 100644 --- a/src/sam/flags.jl +++ b/src/sam/flags.jl @@ -18,9 +18,13 @@ for (name, bits, doc) in [ (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] @assert bits isa UInt16 sym = Symbol("FLAG_", name) - doc = string(@sprintf("0x%04x: ", bits), doc) + docstring = """ $sym + SAM/BAM flag: $doc + + See also: [`flag`](@ref) + """ @eval begin - @doc $(doc) const $(sym) = $(bits) + @doc $(docstring) const $(sym) = $(bits) end end diff --git a/src/sam/record.jl b/src/sam/record.jl index 7dde515..dc0c43d 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -150,11 +150,6 @@ end # Accessor Functions # ------------------ -""" - flag(record::Record)::UInt16 - -Get the bitwise flag of `record`. -""" function flag(record::Record)::UInt16 checkfilled(record) return unsafe_parse_decimal(UInt16, record.data, record.flag) diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 97b8a05..fcd0821 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,6 +11,7 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream +import ..XAM: flag using Printf: @sprintf From 30d61bd79c4254138b21bf19299e8a664291de12 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Fri, 22 Oct 2021 15:24:03 +0200 Subject: [PATCH 13/27] Remove mention of Gitter from README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 951c649..b83b5f9 100644 --- a/README.md +++ b/README.md @@ -66,4 +66,4 @@ Your logo will show up here with a link to your website. ## Questions? -If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). +If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). From dc0992ff540ffff13c54d2647052ddabcebcad86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Fri, 31 Dec 2021 13:24:30 +1100 Subject: [PATCH 14/27] Add LTS release to test matrix --- .github/workflows/UnitTests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index 7b0b1fb..a840a1e 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -14,6 +14,7 @@ jobs: matrix: julia-version: - '1.0' # LTS + - '1.6' # LTS - '1' julia-arch: [x64, x86] os: [ubuntu-latest, windows-latest, macOS-latest] From 52d0ce21b0930fde1f97e781c3839670f8ee2cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=E2=80=99Mara?= Date: Fri, 31 Dec 2021 13:34:21 +1100 Subject: [PATCH 15/27] Remove Gitter chat links --- README.md | 1 - docs/src/index.md | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index b83b5f9..6b83fc1 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,6 @@ [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE) [![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable) [![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/) -[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl) > This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/ "original blog post"). diff --git a/docs/src/index.md b/docs/src/index.md index de6ee30..43b19cf 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -5,7 +5,6 @@ [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE) [![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable) [![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/) -[![Join the chat at https://gitter.im/BioJulia/XAM.jl](https://badges.gitter.im/BioJulia/XAM.jl.svg)](https://gitter.im/BioJulia/XAM.jl) > This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/). @@ -65,4 +64,4 @@ Your logo will show up here with a link to your website. ## Questions? -If you have a question about contributing or using BioJulia software, come on over and chat to us on [Gitter](https://gitter.im/BioJulia/General), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). +If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). From e560681afa7c441584bbc26fac0cf0f04f6ef931 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:13:58 -0500 Subject: [PATCH 16/27] Update BioAlignments to v2.2 and BioSequences to v3 Ok, so this isn't atomic, but we can't upgrade these separately. --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 473a4e3..ab190c9 100644 --- a/Project.toml +++ b/Project.toml @@ -17,9 +17,9 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" [compat] Automa = "0.7, 0.8" BGZFStreams = "0.3" -BioAlignments = "2" +BioAlignments = "2.2" BioGenerics = "0.1" -BioSequences = "2.0.4" +BioSequences = "3" GenomicFeatures = "2" Indexes = "0.1" TranscodingStreams = "0.6, 0.7, 0.8, 0.9" From 1660a24a0fe9283a1d38631e39644e51a506b7c1 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:18:26 -0500 Subject: [PATCH 17/27] Update SAM.sequence to return BioSequences.LongDNA{4} BioSequences v3 changed LongDNASeq to LongDNA{N}. We can't assume that there aren't ambiguous nucleotides in a SAM file, so use the larger LongDNA variant. The method signatures are also simplified, so simplify their calls, accordingly. --- src/sam/record.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sam/record.jl b/src/sam/record.jl index dc0c43d..b241687 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -387,7 +387,7 @@ function hastemplength(record::Record) end """ - sequence(record::Record)::BioSequences.LongDNASeq + sequence(record::Record)::BioSequences.LongDNA{4} Get the segment sequence of `record`. """ @@ -398,7 +398,7 @@ function sequence(record::Record) return nothing end seqlen = length(record.seq) - ret = BioSequences.LongDNASeq(seqlen) + ret = BioSequences.LongDNA{4}(undef, seqlen) copyto!(ret, 1, record.data, first(record.seq), seqlen) return ret end From a7a76db3d454353af8a83434e3492ac355fe3ea7 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Thu, 7 Jul 2022 09:19:43 -0500 Subject: [PATCH 18/27] Update BAM.sequence to return BioSequences.LongDNA{4} Same principles apply as updating SAM.sequence --- src/bam/record.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bam/record.jl b/src/bam/record.jl index b545600..6de18a5 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -91,7 +91,7 @@ function Base.empty!(record::Record) record.tlen = 0 #Note: data will be overwritten and indexed using data_size. - + return record end @@ -505,7 +505,7 @@ function hastemplength(record::Record) end """ - sequence(record::Record)::BioSequences.LongDNASeq + sequence(record::Record)::BioSequences.LongDNA{4} Get the segment sequence of `record`. """ @@ -522,7 +522,7 @@ function sequence(record::Record) x = unsafe_load(src, i) data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4 end - return BioSequences.LongDNASeq(data, 1:seqlen, false) + return BioSequences.LongDNA{4}(data, UInt(seqlen)) end function hassequence(record::Record) From 675e5828b198bc997a76d60ea3a125d46897a4ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 9 Jul 2022 13:37:21 +1000 Subject: [PATCH 19/27] Correct docstring --- src/sam/header.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sam/header.jl b/src/sam/header.jl index 668cb59..5111151 100644 --- a/src/sam/header.jl +++ b/src/sam/header.jl @@ -38,7 +38,7 @@ function Base.iterate(header::Header, i=1) end """ - find(header::Header, key::AbstractString)::Vector{MetaInfo} + findall(header::Header, key::AbstractString)::Vector{MetaInfo} Find metainfo objects satisfying `SAM.tag(metainfo) == key`. """ From 03d8e5e7456da6e4149f44b299e1181a9d5c8ea8 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 11 Jul 2022 07:29:08 -0500 Subject: [PATCH 20/27] Update minimum Julia version to 1.6 BioSymbols v3 requires Julia 1.6+, so update the "julia" compat entry and remove unit testing against Julia 1.0. --- .github/workflows/UnitTests.yml | 1 - Project.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index a840a1e..a6cbe9e 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -13,7 +13,6 @@ jobs: fail-fast: false matrix: julia-version: - - '1.0' # LTS - '1.6' # LTS - '1' julia-arch: [x64, x86] diff --git a/Project.toml b/Project.toml index ab190c9..292961d 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ BioSequences = "3" GenomicFeatures = "2" Indexes = "0.1" TranscodingStreams = "0.6, 0.7, 0.8, 0.9" -julia = "1" +julia = "1.6" [extras] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" From 5da017052b32433ecbd41042600271e7e0c4a387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 23 Feb 2020 14:43:34 +1100 Subject: [PATCH 21/27] Add messages to assertions --- src/bam/overlap.jl | 4 +++- src/bam/writer.jl | 2 +- src/sam/flags.jl | 3 +-- src/sam/record.jl | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index e6212ed..fb28b1f 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -50,7 +50,9 @@ function Base.iterate(iter::OverlapIterator) if refindex === nothing throw(ArgumentError("sequence name $(iter.refname) is not found in the header")) end - @assert iter.reader.index !== nothing + + @assert iter.reader.index !== nothing "Reader index cannot be nothing." + chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval) if isempty(chunks) return nothing diff --git a/src/bam/writer.jl b/src/bam/writer.jl index 2460b3f..81bdf23 100644 --- a/src/bam/writer.jl +++ b/src/bam/writer.jl @@ -37,7 +37,7 @@ function Base.write(writer::Writer, record::Record) end function write_header(stream, header, refseqnames, refseqlens) - @assert length(refseqnames) == length(refseqlens) + @assert length(refseqnames) == length(refseqlens) "Lengths of refseq names and lengths must match." n = 0 # magic bytes diff --git a/src/sam/flags.jl b/src/sam/flags.jl index 7b476fa..ded0df1 100644 --- a/src/sam/flags.jl +++ b/src/sam/flags.jl @@ -16,7 +16,7 @@ for (name, bits, doc) in [ (:QCFAIL, UInt16(0x200), "QC failure" ), (:DUP, UInt16(0x400), "optical or PCR duplicate" ), (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] - @assert bits isa UInt16 + @assert bits isa UInt16 "The bits must be of type UInt16." sym = Symbol("FLAG_", name) docstring = """ $sym SAM/BAM flag: $doc @@ -27,4 +27,3 @@ for (name, bits, doc) in [ @doc $(docstring) const $(sym) = $(bits) end end - diff --git a/src/sam/record.jl b/src/sam/record.jl index b241687..ef8bb6d 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -494,7 +494,7 @@ function Base.getindex(record::Record, tag::AbstractString) end if typ == UInt8('A') - @assert lo == hi + @assert lo == hi "Values lo and hi must be equivalent." return Char(record.data[lo]) end if typ == UInt8('i') From 5cbf9dfb1cec668f6dcaefdef8a4d9d58736743a Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Sat, 8 Oct 2022 15:21:06 -0500 Subject: [PATCH 22/27] Bump version number to 0.3.0 in Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 19c279d..4ff9efb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "Ciarán O'Mara "] -version = "0.2.8" +version = "0.3.0" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" From 46fcf71f96ef03de906ea7f7ae7fea9a1801e098 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Sat, 8 Oct 2022 16:27:19 -0500 Subject: [PATCH 23/27] Add CHANGELOG --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f4ef97d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,27 @@ +# Changelog + +All notable changes to HapLink.jl will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.3.0] + +## Added + +- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29)) +- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43)) + +### Changed + +- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21)) +- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48)) + +### Fixed + +- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26)) + +[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0 From a7a8e53e4d211f45594213c5fbf5ef2f27286184 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 10 Oct 2022 13:06:02 +0000 Subject: [PATCH 24/27] Update .github/workflows/Documentation.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ciarán O'Mara --- .github/workflows/Documentation.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 02e5911..7e683ea 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -10,7 +10,9 @@ on: jobs: Documenter: - name: Documentation + permissions: + contents: write + name: Documentation runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 From 8d9ccc91f2953ab82b5ceb5131776ecd0da071a2 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 10 Oct 2022 13:06:09 +0000 Subject: [PATCH 25/27] Update .github/workflows/CompatHelper.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ciarán O'Mara --- .github/workflows/CompatHelper.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 1eb68ea..f36d4b6 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -3,6 +3,9 @@ on: schedule: - cron: 0 0 * * * workflow_dispatch: +permissions: + contents: write + pull-requests: write jobs: CompatHelper: runs-on: ubuntu-latest From 2e739e2d8d0181b5e3fb1e1c8c6b1ecb4f2d7848 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 10 Oct 2022 13:06:17 +0000 Subject: [PATCH 26/27] Update .github/workflows/CompatHelper.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ciarán O'Mara --- .github/workflows/CompatHelper.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index f36d4b6..b3fc56e 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -10,18 +10,34 @@ jobs: CompatHelper: runs-on: ubuntu-latest steps: + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 + with: + version: '1' + arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} - name: "Install CompatHelper" run: | import Pkg name = "CompatHelper" uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" - version = "2" + version = "3" Pkg.add(; name, uuid, version) shell: julia --color=yes {0} - name: "Run CompatHelper" run: | import CompatHelper - CompatHelper.main(master_branch = "master") + CompatHelper.main() shell: julia --color=yes {0} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 0c4b3f5dd00de71a96d92031db0cdf14534352ce Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Mon, 10 Oct 2022 13:06:28 +0000 Subject: [PATCH 27/27] Update .github/workflows/TagBot.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ciarán O'Mara --- .github/workflows/TagBot.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 6d2efc1..9bdb8d7 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -5,6 +5,11 @@ on: types: - created workflow_dispatch: + inputs: + lookback: + default: 3 +permissions: + contents: write jobs: TagBot: