diff --git a/Project.toml b/Project.toml index 0156054..2f31d67 100644 --- a/Project.toml +++ b/Project.toml @@ -2,3 +2,32 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "CiarĂ¡n O'Mara "] version = "0.1.0" + +[deps] +Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" +BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6" +BioAlignments = "00701ae9-d1dc-5365-b64a-a3a3ebf5695e" +BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81" +BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" +GenomicFeatures = "899a7d2d-5c61-547b-bef9-6698a8d05446" +Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[compat] +Automa = "0.7, 0.8" +BGZFStreams = "0.3" +BioAlignments = "2" +BioCore = "2" +BioSequences = "2" +BufferedStreams = "1" +GenomicFeatures = "2" +Indexes = "0.1" +julia = "1.1" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" + +[targets] +test = ["Test", "YAML"] diff --git a/src/XAM.jl b/src/XAM.jl index 7fc5234..8609747 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -1,10 +1,13 @@ module XAM export - BAM, - SAM + SAM, + BAM include("sam/sam.jl") include("bam/bam.jl") +using .SAM +using .BAM + end # module diff --git a/src/bam/bai.jl b/src/bam/bai.jl index e5caf0a..035f17e 100644 --- a/src/bam/bai.jl +++ b/src/bam/bai.jl @@ -7,7 +7,7 @@ # An index type for the BAM file format. struct BAI # BGZF file index - index::GenomicFeatures.Indexes.BGZFIndex + index::Indexes.BGZFIndex # number of unmapped reads n_no_coors::Union{Nothing, Int} @@ -44,7 +44,7 @@ function read_bai(input::IO) # read contents n_refs = read(input, Int32) - index = GenomicFeatures.Indexes.read_bgzfindex(input, n_refs) + index = Indexes.read_bgzfindex(input, n_refs) if !eof(input) n_no_coors = read(input, UInt64) else diff --git a/src/bam/bam.jl b/src/bam/bam.jl index f3317f6..e055231 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -3,11 +3,18 @@ module BAM +using BioCore +using GenomicFeatures +using XAM.SAM + import BGZFStreams -import BioAlignments: BioAlignments, SAM -import GenomicFeatures: GenomicFeatures, Interval +import BioAlignments +import Indexes import BioSequences -import BioCore: BioCore, isfilled +import BioCore: isfilled, header + +import GenomicFeatures: eachoverlap + include("bai.jl") include("auxdata.jl") diff --git a/src/bam/overlap.jl b/src/bam/overlap.jl index 10641d3..c3475ba 100644 --- a/src/bam/overlap.jl +++ b/src/bam/overlap.jl @@ -36,7 +36,7 @@ mutable struct OverlapIteratorState refindex::Int # possibly overlapping chunks - chunks::Vector{GenomicFeatures.Indexes.Chunk} + chunks::Vector{Indexes.Chunk} # current chunk index chunkid::Int @@ -51,7 +51,7 @@ function Base.iterate(iter::OverlapIterator) throw(ArgumentError("sequence name $(iter.refname) is not found in the header")) end @assert iter.reader.index !== nothing - chunks = GenomicFeatures.Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval) + chunks = Indexes.overlapchunks(iter.reader.index.index, refindex, iter.interval) if !isempty(chunks) seek(iter.reader, first(chunks).start) end diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 5e92b8e..906110a 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -66,10 +66,6 @@ function header(reader::Reader; fillSQ::Bool=false)::SAM.Header return header end -function BioCore.header(reader::Reader) - return header(reader) -end - function Base.seek(reader::Reader, voffset::BGZFStreams.VirtualOffset) seek(reader.stream, voffset) end diff --git a/src/bam/record.jl b/src/bam/record.jl index 31226d2..107f127 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -477,11 +477,11 @@ function hastemplength(record::Record) end """ - sequence(record::Record)::BioSequences.DNASequence + sequence(record::Record)::BioSequences.LongDNASeq Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.DNASequence +function sequence(record::Record)::BioSequences.LongDNASeq checkfilled(record) seqlen = seqlength(record) data = Vector{UInt64}(undef, cld(seqlen, 16)) @@ -491,7 +491,7 @@ function sequence(record::Record)::BioSequences.DNASequence x = unsafe_load(src, i) data[i] = (x & 0x0f0f0f0f0f0f0f0f) << 4 | (x & 0xf0f0f0f0f0f0f0f0) >> 4 end - return BioSequences.DNASequence(data, 1:seqlen, false) + return BioSequences.LongDNASeq(data, 1:seqlen, false) end function hassequence(record::Record) diff --git a/src/sam/reader.jl b/src/sam/reader.jl index ee016e4..d26a840 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -38,10 +38,6 @@ function header(reader::Reader)::Header return reader.header end -function BioCore.header(reader::Reader) - return header(reader) -end - function Base.eltype(::Type{Reader}) return Record end diff --git a/src/sam/record.jl b/src/sam/record.jl index 25d39d8..407fe2f 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -370,17 +370,17 @@ function hastemplength(record::Record) end """ - sequence(record::Record)::BioSequences.DNASequence + sequence(record::Record)::BioSequences.LongDNASeq Get the segment sequence of `record`. """ -function sequence(record::Record)::BioSequences.DNASequence +function sequence(record::Record)::BioSequences.LongDNASeq checkfilled(record) if ismissing(record, record.seq) missingerror(:sequence) end seqlen = length(record.seq) - ret = BioSequences.DNASequence(seqlen) + ret = BioSequences.LongDNASeq(seqlen) BioSequences.encode_copy!(ret, 1, record.data, first(record.seq), seqlen) return ret end diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 2b46653..b917f37 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -3,12 +3,14 @@ module SAM +using BioCore + import Automa import Automa.RegExp: @re_str import BioAlignments import BioCore.Exceptions: missingerror import BioCore.RecordHelper: unsafe_parse_decimal -import BioCore: BioCore, isfilled +import BioCore: isfilled, header import BioSequences import BufferedStreams using Printf: @sprintf diff --git a/test/runtests.jl b/test/runtests.jl index 1182fb5..7594e1c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,13 +1,25 @@ using Test -using BioAlignments -using BioSymbols +using GenomicFeatures +using XAM +import BioAlignments: Alignment, AlignmentAnchor, OP_START, OP_MATCH, OP_DELETE import BGZFStreams: BGZFStream import BioCore.Exceptions: MissingFieldException import BioCore.Testing.get_bio_fmt_specimens import BioSequences: @dna_str, @aa_str -import GenomicFeatures import YAML +import BioCore: + header, + isfilled, + seqname, + hasseqname, + sequence, + hassequence, + leftposition, + rightposition, + hasleftposition, + hasrightposition + # Generate a random range within `range`. function randrange(range) x = rand(range) @@ -68,12 +80,12 @@ end record = SAM.Record() @test !isfilled(record) @test !SAM.ismapped(record) - @test repr(record) == "BioAlignments.SAM.Record: " + @test repr(record) == "XAM.SAM.Record: " @test_throws ArgumentError SAM.flag(record) record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*") @test isfilled(record) - @test occursin(r"^BioAlignments.SAM.Record:\n", repr(record)) + @test occursin(r"^XAM.SAM.Record:\n", repr(record)) @test SAM.ismapped(record) @test SAM.isprimary(record) @test SAM.hastempname(record) @@ -217,7 +229,7 @@ end @testset "Record" begin record = BAM.Record() @test !isfilled(record) - @test repr(record) == "BioAlignments.BAM.Record: " + @test repr(record) == "XAM.BAM.Record: " @test_throws ArgumentError BAM.flag(record) end @@ -225,7 +237,7 @@ end reader = open(BAM.Reader, joinpath(bamdir, "ce#1.bam")) @test isa(reader, BAM.Reader) @test eltype(reader) === BAM.Record - @test startswith(repr(reader), "BioAlignments.BAM.Reader{IOStream}:") + @test startswith(repr(reader), "XAM.BAM.Reader{IOStream}:") # header h = header(reader)