From 955cf8fbe2a46fcabea378eb596297599e0efc41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 21 Mar 2020 09:40:58 +1100 Subject: [PATCH 01/11] Correct link --- docs/src/man/hts-files.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index de87b32..7f0c346 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -286,7 +286,7 @@ SAM.Writer(IOStream()) ``` -To make a BAM Writer is slightly different, as you need to use a specific stream type from the (https://github.com/BioJulia/BGZFStreams.jl)[https://github.com/BioJulia/BGZFStreams.jl] package: +To make a BAM Writer is slightly different, as you need to use a specific stream type from the [https://github.com/BioJulia/BGZFStreams.jl](https://github.com/BioJulia/BGZFStreams.jl) package: ```julia julia> using BGZFStreams From 08e253c297a82ecfdb598c01b7e037c3657f3392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 21 Mar 2020 12:46:35 +1100 Subject: [PATCH 02/11] Use collect --- src/sam/metainfo.jl | 2 +- src/sam/record.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sam/metainfo.jl b/src/sam/metainfo.jl index f40e6ae..0c74a90 100644 --- a/src/sam/metainfo.jl +++ b/src/sam/metainfo.jl @@ -39,7 +39,7 @@ Create a SAM metainfo from `str`. """ function MetaInfo(str::AbstractString) - return MetaInfo(Vector{UInt8}(str)) + return MetaInfo(collect(UInt8, str)) end """ diff --git a/src/sam/record.jl b/src/sam/record.jl index a61e0df..1f6d675 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -71,7 +71,7 @@ function Record(str::AbstractString) end function Base.convert(::Type{Record}, str::AbstractString) - return Record(Vector{UInt8}(str)) + return Record(collect(UInt8, str)) end function Base.show(io::IO, record::Record) From cc2af1976bf0199b9da3f6ba0417285c7e8089da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 21 Mar 2020 12:52:03 +1100 Subject: [PATCH 03/11] Use broadcast --- src/sam/metainfo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sam/metainfo.jl b/src/sam/metainfo.jl index 0c74a90..57961c1 100644 --- a/src/sam/metainfo.jl +++ b/src/sam/metainfo.jl @@ -189,7 +189,7 @@ function keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}} if iscomment(metainfo) throw(ArgumentError("not a dictionary")) end - return Pair{String,String}[key => val for (key, val) in zip(keys(metainfo), values(metainfo))] + return Pair{String, String}.(keys(metainfo), values(metainfo)) end function Base.keys(metainfo::MetaInfo) From aec919c713b11cde218c92818a8dad4f448b6169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sat, 21 Mar 2020 17:34:10 +1100 Subject: [PATCH 04/11] Use unsafe copy --- src/sam/readrecord.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index b5b3373..8dbea12 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -161,7 +161,7 @@ function appendfrom!(dst, dpos, src, spos, n) if length(dst) < dpos + n - 1 resize!(dst, dpos + n - 1) end - copyto!(dst, dpos, src, spos, n) + unsafe_copyto!(dst, dpos, src, spos, n) return dst end From eaf5c73eb5af325ac97a46c18dd3d539fa9cb41a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 25 May 2020 14:08:07 +1000 Subject: [PATCH 05/11] Update CI and documentation for the general registry - Update documentation workflow. - Update CompatHelper workflow. - Update TagBot workflow. - Update unit test workflow. - Update codecov. --- .github/workflows/CompatHelper.yml | 33 +++++------------------ .github/workflows/Documentation.yml | 40 +++++++++++---------------- .github/workflows/TagBot.yml | 3 +-- .github/workflows/UnitTests.yml | 42 ++++++++++++++++++++++------- Project.toml | 2 +- README.md | 13 +++------ ci_prep.jl | 3 --- coverage/Project.toml | 2 -- coverage/coverage.jl | 11 -------- docs/src/index.md | 15 +++-------- 10 files changed, 62 insertions(+), 102 deletions(-) delete mode 100644 ci_prep.jl delete mode 100644 coverage/Project.toml delete mode 100644 coverage/coverage.jl diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index 36f1665..98d1792 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -2,37 +2,16 @@ name: CompatHelper on: schedule: - - cron: '00 * * * *' + - cron: '0 0 * * *' jobs: CompatHelper: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: [1.3.0] - julia-arch: [x86] - os: [ubuntu-latest] + runs-on: ubuntu-latest steps: - - uses: julia-actions/setup-julia@latest - with: - version: ${{ matrix.julia-version }} - name: Add CompatHelper - run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - - name: CompatHelper.main + run: julia --color=yes -e 'using Pkg; Pkg.add("CompatHelper")' + - name: Run CompatHelper env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: julia -e ' - using CompatHelper, Pkg; - my_registries = [ - Pkg.RegistrySpec( - name = "BioJuliaRegistry", - uuid = "ccbd2cc2-2954-11e9-1ccf-f3e7900901ca", - url = "https://github.com/BioJulia/BioJuliaRegistry.git" - ), - Pkg.RegistrySpec( - name = "General", - uuid = "23338594-aafe-5451-b93e-139f81909106", - url = "https://github.com/JuliaRegistries/General.git" - ) - ]; - CompatHelper.main(; registries = my_registries, master_branch = "master");' + COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} + run: julia --color=yes -e 'using CompatHelper; CompatHelper.main(master_branch = "master")' diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 9610301..85a6cd9 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -1,34 +1,26 @@ -name: Documentation +name: Build Documentation on: - push: - branches: - - 'master' - - 'develop' - - 'release/.*' - tags: '*' - pull_request: + push: + branches: + - master + - develop + - release/* + tags: '*' + pull_request: jobs: build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - julia-version: [1.3.0] - julia-arch: [x86] - os: [ubuntu-latest] + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1.0.0 - - uses: julia-actions/setup-julia@latest + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 with: - version: ${{ matrix.julia-version }} - - name: Install dependencies - run: | - julia ci_prep.jl; - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - - name: Build and deploy + version: '1' + - name: Install Dependencies + run: julia --color=yes --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and Deploy env: - # https://github.com/JuliaDocs/Documenter.jl/issues/1177 # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key - run: julia --project=docs/ --color=yes docs/make.jl + run: julia --color=yes --project=docs/ docs/make.jl diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index e65374b..086a8e5 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,7 +1,7 @@ name: TagBot on: schedule: - - cron: '0 * * * *' + - cron: '0 0 * * *' jobs: TagBot: runs-on: ubuntu-latest @@ -10,4 +10,3 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} ssh: ${{ secrets.TAGBOT_KEY }} - registry: BioJulia/BioJuliaRegistry \ No newline at end of file diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index cbaf083..9d5c078 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -1,22 +1,44 @@ -name: Unit tests +name: Unit Tests -on: [push, pull_request] +on: + - push + - pull_request jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.experimental }} strategy: + fail-fast: false matrix: - julia-version: ['1.1', '1.2', '1.3'] - julia-arch: [x64] + julia-version: + - '1.0' # LTS + - '1' + julia-arch: [x64, x86] os: [ubuntu-latest, windows-latest, macOS-latest] + experimental: [false] + include: + - julia-version: nightly + julia-arch: x64 + os: ubuntu-latest + experimental: true steps: - - uses: actions/checkout@v1.0.0 - - uses: julia-actions/setup-julia@v1 + - name: Checkout Repository + uses: actions/checkout@v2 + - name: Setup Julia + uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.julia-version }} - arch: ${{ matrix.julia-arch }} - - name: Install dependencies - run: julia ci_prep.jl - - uses: julia-actions/julia-runtest@master + - name: Run Tests + uses: julia-actions/julia-runtest@latest + - name: Create CodeCov + uses: julia-actions/julia-processcoverage@v1 + - name: Upload CodeCov + uses: codecov/codecov-action@v1 + with: + file: ./lcov.info + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/Project.toml b/Project.toml index 1f031ee..f5a56c2 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ BioSequences = "2" GenomicFeatures = "2" Indexes = "0.1" TranscodingStreams = "0.6, 0.7, 0.8, 0.9" -julia = "1.1" +julia = "1" [extras] FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd" diff --git a/README.md b/README.md index 850a435..951c649 100644 --- a/README.md +++ b/README.md @@ -11,18 +11,11 @@ blog post"). ## Description -XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files. +The XAM package provides I/O and utilities for manipulating SAM and BAM formatted alignment map files. ## Installation -The latest version of XAM is made available to install through BioJulia's package registry. -By default, Julia's package manager only includes the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -Once the registry is added to your configuration, you can install XAM while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the XAM package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add XAM ``` diff --git a/ci_prep.jl b/ci_prep.jl deleted file mode 100644 index f3a7535..0000000 --- a/ci_prep.jl +++ /dev/null @@ -1,3 +0,0 @@ -using Pkg.Registry -Registry.add(Registry.RegistrySpec(url = "https://github.com/BioJulia/BioJuliaRegistry.git")) -Registry.add(Registry.RegistrySpec(url = "https://github.com/JuliaRegistries/General.git")) diff --git a/coverage/Project.toml b/coverage/Project.toml deleted file mode 100644 index 4fbdc47..0000000 --- a/coverage/Project.toml +++ /dev/null @@ -1,2 +0,0 @@ -[deps] -Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037" diff --git a/coverage/coverage.jl b/coverage/coverage.jl deleted file mode 100644 index 3d33ed9..0000000 --- a/coverage/coverage.jl +++ /dev/null @@ -1,11 +0,0 @@ -get(ENV, "TRAVIS_OS_NAME", "") == "linux" || exit() -get(ENV, "TRAVIS_JULIA_VERSION", "") == "1.3" || exit() - -using Pkg -Pkg.instantiate() - -using Coverage - -cd(joinpath(@__DIR__, "..")) do - Codecov.submit(Codecov.process_folder()) -end diff --git a/docs/src/index.md b/docs/src/index.md index a3bbf39..de6ee30 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,20 +10,11 @@ > This project follows the [semver](http://semver.org) pro forma and uses the [git-flow branching model](https://nvie.com/posts/a-successful-git-branching-model/). ## Description - -XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files. - +The XAM package provides I/O and utilities for manipulating SAM and BAM formatted alignment map files. ## Installation -The latest version of XAM is made available to install through BioJulia's package registry. -By default, Julia's package manager only includes the "General" package registry. - -To add the BioJulia registry from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/), press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: -```julia -registry add https://github.com/BioJulia/BioJuliaRegistry.git -``` - -Once the registry is added to your configuration, you can install XAM while in [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/) with the following: +You can install the XAM package from the [Julia REPL](https://docs.julialang.org/en/v1/manual/getting-started/). +Press `]` to enter [pkg mode](https://docs.julialang.org/en/v1/stdlib/Pkg/), then enter the following command: ```julia add XAM ``` From 0ac352f1d24eff2d70dbda63d44e2dcb1e6a2f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 27 May 2020 21:21:12 +1000 Subject: [PATCH 06/11] Isolate testsets --- test/runtests.jl | 480 +---------------------------------------------- test/test_bam.jl | 280 +++++++++++++++++++++++++++ test/test_sam.jl | 198 +++++++++++++++++++ 3 files changed, 480 insertions(+), 478 deletions(-) create mode 100644 test/test_bam.jl create mode 100644 test/test_sam.jl diff --git a/test/runtests.jl b/test/runtests.jl index d727d08..970c3a1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,482 +22,6 @@ function randrange(range) end end -@testset "SAM" begin - samdir = path_of_format("SAM") - @testset "MetaInfo" begin - metainfo = SAM.MetaInfo() - @test !isfilled(metainfo) - @test occursin("not filled", repr(metainfo)) - - metainfo = SAM.MetaInfo("CO", "some comment (parens)") - @test isfilled(metainfo) - @test string(metainfo) == "@CO\tsome comment (parens)" - @test occursin("CO", repr(metainfo)) - @test SAM.tag(metainfo) == "CO" - @test SAM.value(metainfo) == "some comment (parens)" - @test_throws ArgumentError keys(metainfo) - @test_throws ArgumentError values(metainfo) - - metainfo = SAM.MetaInfo("HD", ["VN" => "1.0", "SO" => "coordinate"]) - @test isfilled(metainfo) - @test string(metainfo) == "@HD\tVN:1.0\tSO:coordinate" - @test occursin("HD", repr(metainfo)) - @test SAM.tag(metainfo) == "HD" - @test SAM.value(metainfo) == "VN:1.0\tSO:coordinate" - @test keys(metainfo) == ["VN", "SO"] - @test values(metainfo) == ["1.0", "coordinate"] - @test SAM.keyvalues(metainfo) == ["VN" => "1.0", "SO" => "coordinate"] - @test haskey(metainfo, "VN") - @test haskey(metainfo, "SO") - @test !haskey(metainfo, "GO") - @test metainfo["VN"] == "1.0" - @test metainfo["SO"] == "coordinate" - @test_throws KeyError metainfo["GO"] - end - - @testset "Header" begin - header = SAM.Header() - @test isempty(header) - push!(header, SAM.MetaInfo("@HD\tVN:1.0\tSO:coordinate")) - @test !isempty(header) - @test length(header) == 1 - push!(header, SAM.MetaInfo("@CO\tsome comment")) - @test length(header) == 2 - @test isa(collect(header), Vector{SAM.MetaInfo}) - end - - @testset "Record" begin - record = SAM.Record() - @test !isfilled(record) - @test !SAM.ismapped(record) - @test repr(record) == "XAM.SAM.Record: " - @test_throws ArgumentError SAM.flag(record) - - record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*") - @test isfilled(record) - @test occursin(r"^XAM.SAM.Record:\n", repr(record)) - @test SAM.ismapped(record) - @test SAM.isprimary(record) - @test SAM.hastempname(record) - @test SAM.tempname(record) == "r001" - @test SAM.hasflag(record) - @test SAM.flag(record) === UInt16(99) - @test SAM.hasrefname(record) - @test SAM.refname(record) == "chr1" - @test SAM.hasposition(record) - @test SAM.position(record) === 7 - @test SAM.hasmappingquality(record) - @test SAM.mappingquality(record) === UInt8(30) - @test SAM.hascigar(record) - @test SAM.cigar(record) == "8M2I4M1D3M" - @test SAM.hasnextrefname(record) - @test SAM.nextrefname(record) == "=" - @test SAM.hasnextposition(record) - @test SAM.nextposition(record) === 37 - @test SAM.hastemplength(record) - @test SAM.templength(record) === 39 - @test SAM.hassequence(record) - @test SAM.sequence(record) == dna"TTAGATAAAGGATACTG" - @test !SAM.hasquality(record) - @test_throws MissingFieldException SAM.quality(record) - end - - @testset "Reader" begin - reader = open(SAM.Reader, joinpath(samdir, "ce#1.sam")) - @test isa(reader, SAM.Reader) - @test eltype(reader) === SAM.Record - - # header - h = header(reader) - @test string.(findall(header(reader), "SQ")) == ["@SQ\tSN:CHROMOSOME_I\tLN:1009800"] - - # first record - record = SAM.Record() - read!(reader, record) - @test SAM.ismapped(record) - @test SAM.refname(record) == "CHROMOSOME_I" - @test SAM.position(record) == leftposition(record) == 2 - @test SAM.rightposition(record) == rightposition(record) == 102 - @test SAM.tempname(record) == seqname(record) == "SRR065390.14978392" - @test SAM.sequence(record) == sequence(record) == dna"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA" - @test SAM.sequence(String, record) == "CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA" - @test SAM.seqlength(record) == 100 - @test SAM.quality(record) == (b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" .- 33) - @test SAM.quality(String, record) == "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" - @test SAM.flag(record) == 16 - @test SAM.cigar(record) == "27M1D73M" - @test SAM.alignment(record) == Alignment([ - AlignmentAnchor( 0, 1, OP_START), - AlignmentAnchor( 27, 28, OP_MATCH), - AlignmentAnchor( 27, 29, OP_DELETE), - AlignmentAnchor(100, 102, OP_MATCH)]) - @test record["XG"] == 1 - @test record["XM"] == 5 - @test record["XN"] == 0 - @test record["XO"] == 1 - @test record["AS"] == -18 - @test record["XS"] == -18 - @test record["YT"] == "UU" - @test eof(reader) - close(reader) - - # rightposition (also implicitly alignlength) - records = collect(open(SAM.Reader, joinpath(samdir, "ce#5b.sam"))) - @test SAM.rightposition(records[6]) == rightposition(records[6]) == 83 - - # iterator - @test length(collect(open(SAM.Reader, joinpath(samdir, "ce#1.sam")))) == 1 - @test length(collect(open(SAM.Reader, joinpath(samdir, "ce#2.sam")))) == 2 - - # IOStream - @test length(collect(SAM.Reader(open(joinpath(samdir, "ce#1.sam"))))) == 1 - @test length(collect(SAM.Reader(open(joinpath(samdir, "ce#2.sam"))))) == 2 - end - - @testset "Round trip" begin - function compare_records(xs, ys) - if length(xs) != length(ys) - return false - end - for (x, y) in zip(xs, ys) - if x.data[x.filled] != y.data[y.filled] - return false - end - end - return true - end - for specimen in list_valid_specimens("SAM") - filepath = joinpath(samdir, filename(specimen)) - mktemp() do path, io - # copy - reader = open(SAM.Reader, filepath) - - header_original = header(reader) - - writer = SAM.Writer(io, header_original) - - records = SAM.Record[] - for record in reader - push!(records, record) - write(writer, record) - end - - close(reader) - close(writer) - - reader = open(SAM.Reader, path) - - @test header(reader) == header_original - @test compare_records(collect(reader), records) - - close(reader) - - end - end - end - - @testset "In-Place-Reading Pattern" begin - - file_sam = joinpath(samdir, "ce#5b.sam") - - records = open(collect, SAM.Reader, file_sam) - - reader = open(SAM.Reader, file_sam) - record = SAM.Record() - i = 0 - while !eof(reader) - empty!(record) # Reset the record. - read!(reader, record) - - i = i + 1 - - @test records[i] == record - - end - - close(reader) - - end -end - -@testset "BAM" begin - bamdir = path_of_format("BAM") - - @testset "AuxData" begin - auxdata = BAM.AuxData(UInt8[]) - @test isempty(auxdata) - - buf = IOBuffer() - write(buf, "NM", UInt8('s'), Int16(1)) - auxdata = BAM.AuxData(take!(buf)) - @test length(auxdata) == 1 - @test auxdata["NM"] === Int16(1) - @test collect(auxdata) == ["NM" => Int16(1)] - - buf = IOBuffer() - write(buf, "AS", UInt8('c'), Int8(-18)) - write(buf, "NM", UInt8('s'), Int16(1)) - write(buf, "XA", UInt8('f'), Float32(3.14)) - write(buf, "XB", UInt8('Z'), "some text\0") - write(buf, "XC", UInt8('B'), UInt8('i'), Int32(3), Int32[10, -5, 8]) - auxdata = BAM.AuxData(take!(buf)) - @test length(auxdata) == 5 - @test auxdata["AS"] === Int8(-18) - @test auxdata["NM"] === Int16(1) - @test auxdata["XA"] === Float32(3.14) - @test auxdata["XB"] == "some text" - @test auxdata["XC"] == Int32[10, -5, 8] - @test convert(Dict{String,Any}, auxdata) == Dict( - "AS" => Int8(-18), - "NM" => Int16(1), - "XA" => Float32(3.14), - "XB" => "some text", - "XC" => Int32[10, -5, 8]) - end - - @testset "Record" begin - record = BAM.Record() - @test !isfilled(record) - @test repr(record) == "XAM.BAM.Record: " - @test_throws ArgumentError BAM.flag(record) - end - - @testset "Reader" begin - reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam")) - @test isa(reader, BAM.Reader) - @test eltype(reader) === BAM.Record - @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:") - - # header - h = header(reader) - @test isa(h, SAM.Header) - - # first record - record = BAM.Record() - read!(reader, record) - @test BAM.ismapped(record) - @test BAM.isprimary(record) - @test ! BAM.ispositivestrand(record) - @test BAM.refname(record) == "CHROMOSOME_I" - @test BAM.refid(record) === 1 - @test BAM.hasnextrefid(record) - @test BAM.nextrefid(record) === 0 - @test BAM.hasposition(record) === hasleftposition(record) === true - @test BAM.position(record) === leftposition(record) === 2 - @test BAM.hasnextposition(record) - @test BAM.nextposition(record) === 0 - @test rightposition(record) == 102 - @test BAM.hastempname(record) === hasseqname(record) === true - @test BAM.tempname(record) == seqname(record) == "SRR065390.14978392" - @test BAM.hassequence(record) === hassequence(record) === true - @test BAM.sequence(record) == sequence(record) == dna""" - CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCT - AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA - """ - @test BAM.seqlength(record) === 100 - @test BAM.hasquality(record) - @test eltype(BAM.quality(record)) == UInt8 - @test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"] - @test BAM.flag(record) === UInt16(16) - @test BAM.cigar(record) == "27M1D73M" - @test BAM.alignment(record) == Alignment([ - AlignmentAnchor( 0, 1, OP_START), - AlignmentAnchor( 27, 28, OP_MATCH), - AlignmentAnchor( 27, 29, OP_DELETE), - AlignmentAnchor(100, 102, OP_MATCH)]) - @test record["XG"] == 1 - @test record["XM"] == 5 - @test record["XN"] == 0 - @test record["XO"] == 1 - @test record["AS"] == -18 - @test record["XS"] == -18 - @test record["YT"] == "UU" - @test keys(record) == ["XG","XM","XN","XO","AS","XS","YT"] - @test values(record) == [1, 5, 0, 1, -18, -18, "UU"] - @test eof(reader) - close(reader) - - # Test conversion from byte array to record - dsize = BAM.data_size(record) - array = Vector{UInt8}(undef, BAM.FIXED_FIELDS_BYTES + dsize) - GC.@preserve array record begin - ptr = Ptr{UInt8}(pointer_from_objref(record)) - unsafe_copyto!(pointer(array), ptr, BAM.FIXED_FIELDS_BYTES) - unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize) - end - new_record = convert(BAM.Record, array) - @test record.bin_mq_nl == new_record.bin_mq_nl - @test record.block_size == new_record.block_size - @test record.flag_nc == new_record.flag_nc - @test record.l_seq == new_record.l_seq - @test record.next_refid == new_record.next_refid - @test record.next_pos == new_record.next_pos - @test record.refid == new_record.refid - @test record.pos == new_record.pos - @test record.tlen == new_record.tlen - @test record.data == new_record.data - - # rightposition (also implicitly alignlength) - records = collect(open(BAM.Reader, joinpath(bamdir, "ce#5b.bam"))) - @test BAM.rightposition(records[6]) == rightposition(records[6]) == 83 - - # iterator - @test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#1.bam")))) == 1 - @test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#2.bam")))) == 2 - - # IOStream - @test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#1.bam"))))) == 1 - @test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#2.bam"))))) == 2 - end - - @testset "Read long CIGARs" begin - function get_cigar_lens(rec::BAM.Record) - cigar_ops, cigar_n = BAM.cigar_rle(rec) - field_ops, field_n = BAM.cigar_rle(rec, false) - cigar_l = length(cigar_ops) - field_l = length(field_ops) - return cigar_l, field_l - end - - function check_cigar_vs_field(rec::BAM.Record) - cigar = BAM.cigar(rec) - field = BAM.cigar(rec, false) - cigar_l, field_l = get_cigar_lens(rec) - return cigar != field && cigar_l != field_l - end - - function check_cigar_lens(rec::BAM.Record, field_len, cigar_len) - cigar_l, field_l = get_cigar_lens(rec) - return cigar_l == cigar_len && field_l == field_len - end - - reader = open(BAM.Reader, joinpath(bamdir, "cigar-64k.bam")) - rec = BAM.Record() - read!(reader, rec) - @test !check_cigar_vs_field(rec) - read!(reader, rec) - @test check_cigar_vs_field(rec) - @test check_cigar_lens(rec, 2, 72091) - end - - function compare_records(xs, ys) - if length(xs) != length(ys) - return false - end - for (x, y) in zip(xs, ys) - if !( - x.block_size == y.block_size && - x.refid == y.refid && - x.pos == y.pos && - x.bin_mq_nl == y.bin_mq_nl && - x.flag_nc == y.flag_nc && - x.l_seq == y.l_seq && - x.next_refid == y.next_refid && - x.next_pos == y.next_pos && - x.tlen == y.tlen && - x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)]) - return false - end - end - return true - end - - @testset "Round trip" begin - for specimen in list_valid_specimens("BAM") - filepath = joinpath(bamdir, filename(specimen)) - mktemp() do path, _ - # copy - if hastags(specimen) && in("bai", tags(specimen)) - reader = open(BAM.Reader, filepath, index=filepath * ".bai") - else - reader = open(BAM.Reader, filepath) - end - - header_original = header(reader) - - writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))) - - records = BAM.Record[] - for record in reader - push!(records, record) - write(writer, record) - end - close(reader) - close(writer) - - reader = open(BAM.Reader, path) - - @test header(reader) == header_original - @test compare_records(collect(reader), records) - - close(reader) - - end - end - end - - @testset "In-Place-Reading Pattern" begin - - file_bam = joinpath(bamdir, "ce#5b.bam") - - records = open(collect, BAM.Reader, file_bam) - - reader = open(BAM.Reader, file_bam) - record = BAM.Record() - i = 0 - while !eof(reader) - empty!(record) # Reset the record. - read!(reader, record) - - i = i + 1 - @test records[i] == record - end - - close(reader) - - end - - @testset "Random access" begin - filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam") - reader = open(BAM.Reader, filepath, index=filepath * ".bai") - - @test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator) - @test isa(eachoverlap(reader, GenomicFeatures.Interval("chr1", 1, 100)), BAM.OverlapIterator) - - # expected values are counted using samtools - for (refname, interval, expected) in [ - ("chr1", 1_000:10000, 21), - ("chr1", 8_000:10000, 20), - ("chr1", 766_000:800_000, 142), - ("chr1", 786_000:800_000, 1), - ("chr1", 796_000:800_000, 0)] - intsect = eachoverlap(reader, refname, interval) - @test eltype(intsect) == BAM.Record - @test count(_ -> true, intsect) == expected - # check that the intersection iterator is stateless - @test count(_ -> true, intsect) == expected - end - - # randomized tests - for n in 1:50 - refindex = 1 - refname = "chr1" - range = randrange(1:1_000_000) - seekstart(reader) - # linear scan - expected = filter(collect(reader)) do record - BAM.compare_intervals(record, (refindex, range)) == 0 - end - # indexed scan - actual = collect(eachoverlap(reader, refname, range)) - @test compare_records(actual, expected) - end - close(reader) - - filepath = joinpath(bamdir, "R_12h_D06.uniq.q40.bam") - reader = open(BAM.Reader, filepath, index=filepath * ".bai") - @test isempty(collect(eachoverlap(reader, "chr19", 5823708:5846478))) - close(reader) - end -end +include("test_sam.jl") +include("test_bam.jl") diff --git a/test/test_bam.jl b/test/test_bam.jl new file mode 100644 index 0000000..186d231 --- /dev/null +++ b/test/test_bam.jl @@ -0,0 +1,280 @@ +@testset "BAM" begin + bamdir = path_of_format("BAM") + + @testset "AuxData" begin + auxdata = BAM.AuxData(UInt8[]) + @test isempty(auxdata) + + buf = IOBuffer() + write(buf, "NM", UInt8('s'), Int16(1)) + auxdata = BAM.AuxData(take!(buf)) + @test length(auxdata) == 1 + @test auxdata["NM"] === Int16(1) + @test collect(auxdata) == ["NM" => Int16(1)] + + buf = IOBuffer() + write(buf, "AS", UInt8('c'), Int8(-18)) + write(buf, "NM", UInt8('s'), Int16(1)) + write(buf, "XA", UInt8('f'), Float32(3.14)) + write(buf, "XB", UInt8('Z'), "some text\0") + write(buf, "XC", UInt8('B'), UInt8('i'), Int32(3), Int32[10, -5, 8]) + auxdata = BAM.AuxData(take!(buf)) + @test length(auxdata) == 5 + @test auxdata["AS"] === Int8(-18) + @test auxdata["NM"] === Int16(1) + @test auxdata["XA"] === Float32(3.14) + @test auxdata["XB"] == "some text" + @test auxdata["XC"] == Int32[10, -5, 8] + @test convert(Dict{String,Any}, auxdata) == Dict( + "AS" => Int8(-18), + "NM" => Int16(1), + "XA" => Float32(3.14), + "XB" => "some text", + "XC" => Int32[10, -5, 8]) + end + + @testset "Record" begin + record = BAM.Record() + @test !isfilled(record) + @test repr(record) == "XAM.BAM.Record: " + @test_throws ArgumentError BAM.flag(record) + end + + @testset "Reader" begin + reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam")) + @test isa(reader, BAM.Reader) + @test eltype(reader) === BAM.Record + @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:") + + # header + h = header(reader) + @test isa(h, SAM.Header) + + # first record + record = BAM.Record() + read!(reader, record) + @test BAM.ismapped(record) + @test BAM.isprimary(record) + @test ! BAM.ispositivestrand(record) + @test BAM.refname(record) == "CHROMOSOME_I" + @test BAM.refid(record) === 1 + @test BAM.hasnextrefid(record) + @test BAM.nextrefid(record) === 0 + @test BAM.hasposition(record) === hasleftposition(record) === true + @test BAM.position(record) === leftposition(record) === 2 + @test BAM.hasnextposition(record) + @test BAM.nextposition(record) === 0 + @test rightposition(record) == 102 + @test BAM.hastempname(record) === hasseqname(record) === true + @test BAM.tempname(record) == seqname(record) == "SRR065390.14978392" + @test BAM.hassequence(record) === hassequence(record) === true + @test BAM.sequence(record) == sequence(record) == dna""" + CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCT + AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA + """ + @test BAM.seqlength(record) === 100 + @test BAM.hasquality(record) + @test eltype(BAM.quality(record)) == UInt8 + @test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"] + @test BAM.flag(record) === UInt16(16) + @test BAM.cigar(record) == "27M1D73M" + @test BAM.alignment(record) == Alignment([ + AlignmentAnchor( 0, 1, OP_START), + AlignmentAnchor( 27, 28, OP_MATCH), + AlignmentAnchor( 27, 29, OP_DELETE), + AlignmentAnchor(100, 102, OP_MATCH)]) + @test record["XG"] == 1 + @test record["XM"] == 5 + @test record["XN"] == 0 + @test record["XO"] == 1 + @test record["AS"] == -18 + @test record["XS"] == -18 + @test record["YT"] == "UU" + @test keys(record) == ["XG","XM","XN","XO","AS","XS","YT"] + @test values(record) == [1, 5, 0, 1, -18, -18, "UU"] + @test eof(reader) + close(reader) + + # Test conversion from byte array to record + dsize = BAM.data_size(record) + array = Vector{UInt8}(undef, BAM.FIXED_FIELDS_BYTES + dsize) + GC.@preserve array record begin + ptr = Ptr{UInt8}(pointer_from_objref(record)) + unsafe_copyto!(pointer(array), ptr, BAM.FIXED_FIELDS_BYTES) + unsafe_copyto!(array, BAM.FIXED_FIELDS_BYTES + 1, record.data, 1, dsize) + end + new_record = convert(BAM.Record, array) + @test record.bin_mq_nl == new_record.bin_mq_nl + @test record.block_size == new_record.block_size + @test record.flag_nc == new_record.flag_nc + @test record.l_seq == new_record.l_seq + @test record.next_refid == new_record.next_refid + @test record.next_pos == new_record.next_pos + @test record.refid == new_record.refid + @test record.pos == new_record.pos + @test record.tlen == new_record.tlen + @test record.data == new_record.data + + # rightposition (also implicitly alignlength) + records = collect(open(BAM.Reader, joinpath(bamdir, "ce#5b.bam"))) + @test BAM.rightposition(records[6]) == rightposition(records[6]) == 83 + + # iterator + @test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#1.bam")))) == 1 + @test length(collect(open(BAM.Reader, joinpath(bamdir, "ce#2.bam")))) == 2 + + # IOStream + @test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#1.bam"))))) == 1 + @test length(collect(BAM.Reader(open(joinpath(bamdir, "ce#2.bam"))))) == 2 + end + + @testset "Read long CIGARs" begin + function get_cigar_lens(rec::BAM.Record) + cigar_ops, cigar_n = BAM.cigar_rle(rec) + field_ops, field_n = BAM.cigar_rle(rec, false) + cigar_l = length(cigar_ops) + field_l = length(field_ops) + return cigar_l, field_l + end + + function check_cigar_vs_field(rec::BAM.Record) + cigar = BAM.cigar(rec) + field = BAM.cigar(rec, false) + cigar_l, field_l = get_cigar_lens(rec) + return cigar != field && cigar_l != field_l + end + + function check_cigar_lens(rec::BAM.Record, field_len, cigar_len) + cigar_l, field_l = get_cigar_lens(rec) + return cigar_l == cigar_len && field_l == field_len + end + + reader = open(BAM.Reader, joinpath(bamdir, "cigar-64k.bam")) + rec = BAM.Record() + read!(reader, rec) + @test !check_cigar_vs_field(rec) + read!(reader, rec) + @test check_cigar_vs_field(rec) + @test check_cigar_lens(rec, 2, 72091) + end + + function compare_records(xs, ys) + if length(xs) != length(ys) + return false + end + for (x, y) in zip(xs, ys) + if !( + x.block_size == y.block_size && + x.refid == y.refid && + x.pos == y.pos && + x.bin_mq_nl == y.bin_mq_nl && + x.flag_nc == y.flag_nc && + x.l_seq == y.l_seq && + x.next_refid == y.next_refid && + x.next_pos == y.next_pos && + x.tlen == y.tlen && + x.data[1:BAM.data_size(x)] == y.data[1:BAM.data_size(y)]) + return false + end + end + return true + end + + @testset "Round trip" begin + for specimen in list_valid_specimens("BAM") + filepath = joinpath(bamdir, filename(specimen)) + mktemp() do path, _ + # copy + if hastags(specimen) && in("bai", tags(specimen)) + reader = open(BAM.Reader, filepath, index=filepath * ".bai") + else + reader = open(BAM.Reader, filepath) + end + + header_original = header(reader) + + writer = BAM.Writer(BGZFStream(path, "w"), BAM.header(reader, fillSQ=isempty(findall(header(reader), "SQ")))) + + records = BAM.Record[] + for record in reader + push!(records, record) + write(writer, record) + end + close(reader) + close(writer) + + reader = open(BAM.Reader, path) + + @test header(reader) == header_original + @test compare_records(collect(reader), records) + + close(reader) + + end + end + end + + @testset "In-Place-Reading Pattern" begin + + file_bam = joinpath(bamdir, "ce#5b.bam") + + records = open(collect, BAM.Reader, file_bam) + + reader = open(BAM.Reader, file_bam) + record = BAM.Record() + i = 0 + while !eof(reader) + empty!(record) # Reset the record. + read!(reader, record) + + i = i + 1 + @test records[i] == record + end + + close(reader) + + end + + @testset "Random access" begin + filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam") + reader = open(BAM.Reader, filepath, index=filepath * ".bai") + + @test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator) + @test isa(eachoverlap(reader, GenomicFeatures.Interval("chr1", 1, 100)), BAM.OverlapIterator) + + # expected values are counted using samtools + for (refname, interval, expected) in [ + ("chr1", 1_000:10000, 21), + ("chr1", 8_000:10000, 20), + ("chr1", 766_000:800_000, 142), + ("chr1", 786_000:800_000, 1), + ("chr1", 796_000:800_000, 0)] + intsect = eachoverlap(reader, refname, interval) + @test eltype(intsect) == BAM.Record + @test count(_ -> true, intsect) == expected + # check that the intersection iterator is stateless + @test count(_ -> true, intsect) == expected + end + + # randomized tests + for n in 1:50 + refindex = 1 + refname = "chr1" + range = randrange(1:1_000_000) + seekstart(reader) + # linear scan + expected = filter(collect(reader)) do record + BAM.compare_intervals(record, (refindex, range)) == 0 + end + # indexed scan + actual = collect(eachoverlap(reader, refname, range)) + @test compare_records(actual, expected) + end + close(reader) + + filepath = joinpath(bamdir, "R_12h_D06.uniq.q40.bam") + reader = open(BAM.Reader, filepath, index=filepath * ".bai") + @test isempty(collect(eachoverlap(reader, "chr19", 5823708:5846478))) + close(reader) + end +end diff --git a/test/test_sam.jl b/test/test_sam.jl new file mode 100644 index 0000000..e669757 --- /dev/null +++ b/test/test_sam.jl @@ -0,0 +1,198 @@ +@testset "SAM" begin + samdir = path_of_format("SAM") + + @testset "MetaInfo" begin + metainfo = SAM.MetaInfo() + @test !isfilled(metainfo) + @test occursin("not filled", repr(metainfo)) + + metainfo = SAM.MetaInfo("CO", "some comment (parens)") + @test isfilled(metainfo) + @test string(metainfo) == "@CO\tsome comment (parens)" + @test occursin("CO", repr(metainfo)) + @test SAM.tag(metainfo) == "CO" + @test SAM.value(metainfo) == "some comment (parens)" + @test_throws ArgumentError keys(metainfo) + @test_throws ArgumentError values(metainfo) + + metainfo = SAM.MetaInfo("HD", ["VN" => "1.0", "SO" => "coordinate"]) + @test isfilled(metainfo) + @test string(metainfo) == "@HD\tVN:1.0\tSO:coordinate" + @test occursin("HD", repr(metainfo)) + @test SAM.tag(metainfo) == "HD" + @test SAM.value(metainfo) == "VN:1.0\tSO:coordinate" + @test keys(metainfo) == ["VN", "SO"] + @test values(metainfo) == ["1.0", "coordinate"] + @test SAM.keyvalues(metainfo) == ["VN" => "1.0", "SO" => "coordinate"] + @test haskey(metainfo, "VN") + @test haskey(metainfo, "SO") + @test !haskey(metainfo, "GO") + @test metainfo["VN"] == "1.0" + @test metainfo["SO"] == "coordinate" + @test_throws KeyError metainfo["GO"] + end + + @testset "Header" begin + header = SAM.Header() + @test isempty(header) + push!(header, SAM.MetaInfo("@HD\tVN:1.0\tSO:coordinate")) + @test !isempty(header) + @test length(header) == 1 + push!(header, SAM.MetaInfo("@CO\tsome comment")) + @test length(header) == 2 + @test isa(collect(header), Vector{SAM.MetaInfo}) + end + + @testset "Record" begin + record = SAM.Record() + @test !isfilled(record) + @test !SAM.ismapped(record) + @test repr(record) == "XAM.SAM.Record: " + @test_throws ArgumentError SAM.flag(record) + + record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*") + @test isfilled(record) + @test occursin(r"^XAM.SAM.Record:\n", repr(record)) + @test SAM.ismapped(record) + @test SAM.isprimary(record) + @test SAM.hastempname(record) + @test SAM.tempname(record) == "r001" + @test SAM.hasflag(record) + @test SAM.flag(record) === UInt16(99) + @test SAM.hasrefname(record) + @test SAM.refname(record) == "chr1" + @test SAM.hasposition(record) + @test SAM.position(record) === 7 + @test SAM.hasmappingquality(record) + @test SAM.mappingquality(record) === UInt8(30) + @test SAM.hascigar(record) + @test SAM.cigar(record) == "8M2I4M1D3M" + @test SAM.hasnextrefname(record) + @test SAM.nextrefname(record) == "=" + @test SAM.hasnextposition(record) + @test SAM.nextposition(record) === 37 + @test SAM.hastemplength(record) + @test SAM.templength(record) === 39 + @test SAM.hassequence(record) + @test SAM.sequence(record) == dna"TTAGATAAAGGATACTG" + @test !SAM.hasquality(record) + @test_throws MissingFieldException SAM.quality(record) + end + + @testset "Reader" begin + reader = open(SAM.Reader, joinpath(samdir, "ce#1.sam")) + @test isa(reader, SAM.Reader) + @test eltype(reader) === SAM.Record + + # header + h = header(reader) + @test string.(findall(header(reader), "SQ")) == ["@SQ\tSN:CHROMOSOME_I\tLN:1009800"] + + # first record + record = SAM.Record() + read!(reader, record) + @test SAM.ismapped(record) + @test SAM.refname(record) == "CHROMOSOME_I" + @test SAM.position(record) == leftposition(record) == 2 + @test SAM.rightposition(record) == rightposition(record) == 102 + @test SAM.tempname(record) == seqname(record) == "SRR065390.14978392" + @test SAM.sequence(record) == sequence(record) == dna"CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA" + @test SAM.sequence(String, record) == "CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA" + @test SAM.seqlength(record) == 100 + @test SAM.quality(record) == (b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" .- 33) + @test SAM.quality(String, record) == "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + @test SAM.flag(record) == 16 + @test SAM.cigar(record) == "27M1D73M" + @test SAM.alignment(record) == Alignment([ + AlignmentAnchor( 0, 1, OP_START), + AlignmentAnchor( 27, 28, OP_MATCH), + AlignmentAnchor( 27, 29, OP_DELETE), + AlignmentAnchor(100, 102, OP_MATCH)]) + @test record["XG"] == 1 + @test record["XM"] == 5 + @test record["XN"] == 0 + @test record["XO"] == 1 + @test record["AS"] == -18 + @test record["XS"] == -18 + @test record["YT"] == "UU" + @test eof(reader) + close(reader) + + # rightposition (also implicitly alignlength) + records = collect(open(SAM.Reader, joinpath(samdir, "ce#5b.sam"))) + @test SAM.rightposition(records[6]) == rightposition(records[6]) == 83 + + # iterator + @test length(collect(open(SAM.Reader, joinpath(samdir, "ce#1.sam")))) == 1 + @test length(collect(open(SAM.Reader, joinpath(samdir, "ce#2.sam")))) == 2 + + # IOStream + @test length(collect(SAM.Reader(open(joinpath(samdir, "ce#1.sam"))))) == 1 + @test length(collect(SAM.Reader(open(joinpath(samdir, "ce#2.sam"))))) == 2 + end + + @testset "Round trip" begin + function compare_records(xs, ys) + if length(xs) != length(ys) + return false + end + for (x, y) in zip(xs, ys) + if x.data[x.filled] != y.data[y.filled] + return false + end + end + return true + end + for specimen in list_valid_specimens("SAM") + filepath = joinpath(samdir, filename(specimen)) + mktemp() do path, io + # copy + reader = open(SAM.Reader, filepath) + + header_original = header(reader) + + writer = SAM.Writer(io, header_original) + + records = SAM.Record[] + for record in reader + push!(records, record) + write(writer, record) + end + + close(reader) + close(writer) + + reader = open(SAM.Reader, path) + + @test header(reader) == header_original + @test compare_records(collect(reader), records) + + close(reader) + + end + end + end + + @testset "In-Place-Reading Pattern" begin + + file_sam = joinpath(samdir, "ce#5b.sam") + + records = open(collect, SAM.Reader, file_sam) + + reader = open(SAM.Reader, file_sam) + record = SAM.Record() + i = 0 + while !eof(reader) + empty!(record) # Reset the record. + read!(reader, record) + + i = i + 1 + + @test records[i] == record + + end + + close(reader) + + end +end From 73de48e2325443c228f69b2c7407de2be83137c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 27 May 2020 20:55:37 +1000 Subject: [PATCH 07/11] Move imports --- src/sam/readrecord.jl | 4 ++++ src/sam/sam.jl | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 8dbea12..b3f5e2d 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -1,6 +1,10 @@ # Automa.jl generated readrecord! and readmetainfo! functions # ======================================== +import Automa +import Automa.RegExp: @re_str +import Automa.Stream: @mark, @markpos, @relpos, @abspos + # file = header . body # header = metainfo* # body = record* diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 95e36f6..9710269 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -5,9 +5,6 @@ module SAM using BioGenerics -import Automa -import Automa.RegExp: @re_str -import Automa.Stream: @mark, @markpos, @relpos, @abspos import BioAlignments import BioGenerics: BioGenerics, isfilled, header import BioGenerics.Exceptions: missingerror From 436cfd84baf799b4c35ddeee5835d94114814db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Wed, 27 May 2020 20:55:51 +1000 Subject: [PATCH 08/11] Use @info --- src/sam/readrecord.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index b3f5e2d..8bd5f06 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -10,7 +10,7 @@ import Automa.Stream: @mark, @markpos, @relpos, @abspos # body = record* const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = (function () - isinteractive() && info("compiling SAM") + isinteractive() && @info "compiling SAM" cat = Automa.RegExp.cat rep = Automa.RegExp.rep From 946068b2f3d2e907ff5eeec1f77e49907aa32f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Mon, 22 Jun 2020 10:21:32 +1000 Subject: [PATCH 09/11] Simplify use of `appendfrom!` --- src/sam/readrecord.jl | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 8bd5f06..4bc0761 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -170,16 +170,10 @@ function appendfrom!(dst, dpos, src, spos, n) end const action_metainfo = quote + appendfrom!(metainfo.data, 1, data, @markpos, p-@markpos) + metainfo.filled = 1:(p-@markpos) - let markpos = @markpos() - - appendfrom!(metainfo.data, 1, data, markpos, length(markpos:p-1)) - - metainfo.filled = @relpos(markpos):@relpos(p-1) - - found_metainfo = true - end - + found_metainfo = true end const sam_actions_metainfo = Dict( @@ -231,15 +225,11 @@ const sam_actions_record = Dict( :record_qual => :(record.qual = pos:@relpos(p-1)), :record_field => :(push!(record.fields, pos:@relpos(p-1))), :record => quote - let markpos = @markpos() + appendfrom!(record.data, 1, data, @markpos, p-@markpos) + record.filled = 1:(p-@markpos) - appendfrom!(record.data, 1, data, markpos, length(markpos:p-1)) - - record.filled = @relpos(markpos):@relpos(p-1) - - found_record = true - @escape - end + found_record = true + @escape end ) From 6141d679e8f30a269e9aa2d940f38bef2a503da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Mon, 22 Jun 2020 09:53:54 +1000 Subject: [PATCH 10/11] Use `copyto!` https://github.com/BioJulia/BioSequences.jl/pull/95 --- Project.toml | 2 +- src/sam/record.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index f5a56c2..3b2af5e 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ Automa = "0.7, 0.8" BGZFStreams = "0.3" BioAlignments = "2" BioGenerics = "0.1" -BioSequences = "2" +BioSequences = "2.0.4" GenomicFeatures = "2" Indexes = "0.1" TranscodingStreams = "0.6, 0.7, 0.8, 0.9" diff --git a/src/sam/record.jl b/src/sam/record.jl index dc14c02..2d94a12 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -399,7 +399,7 @@ function sequence(record::Record)::BioSequences.LongDNASeq end seqlen = length(record.seq) ret = BioSequences.LongDNASeq(seqlen) - BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen) + copyto!(ret, 1, record.data, first(record.seq), seqlen) return ret end From bde477d54fe8d6d5d8f5bbcc158b7af3f02bacd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciara=CC=81n=20O=27Mara?= Date: Mon, 22 Jun 2020 10:35:34 +1000 Subject: [PATCH 11/11] Increment version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3b2af5e..d8ba051 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "CiarĂ¡n O'Mara "] -version = "0.2.4" +version = "0.2.5" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"