From 9a9f2c1f5ad7ad7765475e744c8dc9a958458c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 9 Jan 2023 14:10:50 +1100 Subject: [PATCH] Fun with flags Implements flag queries. --- src/XAM.jl | 24 +---- src/bam/bam.jl | 3 +- src/bam/record.jl | 40 -------- src/flags.jl | 226 ++++++++++++++++++++++++++++++++++++++++++++++ src/sam/flags.jl | 29 ------ src/sam/record.jl | 29 ------ src/sam/sam.jl | 4 +- 7 files changed, 233 insertions(+), 122 deletions(-) create mode 100644 src/flags.jl delete mode 100644 src/sam/flags.jl diff --git a/src/XAM.jl b/src/XAM.jl index 2ae8ac8..6aa98a5 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -1,35 +1,17 @@ module XAM using BioGenerics +import BioGenerics: isfilled #Note: used by `ismapped`. export SAM, BAM - + abstract type XAMRecord end abstract type XAMReader <: BioGenerics.IO.AbstractReader end abstract type XAMWriter <: BioGenerics.IO.AbstractWriter end -""" - flag(record::Union{SAM.Record, BAM.Record})::UInt16 - -Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag -being OR'd together. The possible flags are: - - 0x0001 template having multiple segments in sequencing - 0x0002 each segment properly aligned according to the aligner - 0x0004 segment unmapped - 0x0008 next segment in the template unmapped - 0x0010 SEQ being reverse complemented - 0x0020 SEQ of the next segment in the template being reverse complemented - 0x0040 the first segment in the template - 0x0080 the last segment in the template - 0x0100 secondary alignment - 0x0200 not passing filters, such as platform/vendor quality controls - 0x0400 PCR or optical duplicate - 0x0800 supplementary alignment -""" -function flag end +include("flags.jl") include("sam/sam.jl") include("bam/bam.jl") diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 03ef9fd..69ca235 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,7 +6,8 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, + ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. import BGZFStreams import BioAlignments diff --git a/src/bam/record.jl b/src/bam/record.jl index db33f88..e8ee356 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -137,37 +137,6 @@ function hasflag(record::Record) return isfilled(record) end -""" - ismapped(record::Record)::Bool - -Test if `record` is mapped. -""" -function ismapped(record::Record)::Bool - return flag(record) & SAM.FLAG_UNMAP == 0 -end - -""" - isprimary(record::Record)::Bool - -Test if `record` is a primary line of the read. - -This is equivalent to `flag(record) & 0x900 == 0`. -""" -function isprimary(record::Record)::Bool - return flag(record) & 0x900 == 0 -end - -""" - ispositivestrand(record::Record)::Bool - -Test if `record` is aligned to the positive strand. - -This is equivalent to `flag(record) & 0x10 == 0`. -""" -function ispositivestrand(record::Record)::Bool - flag(record) & 0x10 == 0 -end - """ refid(record::Record)::Int @@ -253,15 +222,6 @@ function hasrightposition(record::Record) return isfilled(record) && ismapped(record) end -""" - isnextmapped(record::Record)::Bool - -Test if the mate/next read of `record` is mapped. -""" -function isnextmapped(record::Record)::Bool - return isfilled(record) && (flag(record) & SAM.FLAG_MUNMAP == 0) -end - """ nextrefid(record::Record)::Int diff --git a/src/flags.jl b/src/flags.jl new file mode 100644 index 0000000..4679749 --- /dev/null +++ b/src/flags.jl @@ -0,0 +1,226 @@ +# Flags +# ========= +# + +""" + flag(record::XAMRecord})::UInt16 + +Get the bitwise flags of `record`. +The returned value is a `UInt16` of each flag being OR'd together. +The possible flags are: + + 0x0001 template having multiple segments in sequencing + 0x0002 each segment properly aligned according to the aligner + 0x0004 segment unmapped + 0x0008 next segment in the template unmapped + 0x0010 SEQ being reverse complemented + 0x0020 SEQ of the next segment in the template being reverse complemented + 0x0040 the first segment in the template + 0x0080 the last segment in the template + 0x0100 secondary alignment + 0x0200 not passing filters, such as platform/vendor quality controls + 0x0400 PCR or optical duplicate + 0x0800 supplementary alignment +""" + +function flag end + +# Bitwise flags (or FLAG). +for (name, bits, doc) in [ + (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), + (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), + (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with FLAG_PROPER_PAIR" ), + (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), + (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), + (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), + (:READ1, UInt16(0x040), "this is read1" ), + (:READ2, UInt16(0x080), "this is read2" ), + (:SECONDARY, UInt16(0x100), "not primary alignment" ), + (:QCFAIL, UInt16(0x200), "QC failure" ), + (:DUP, UInt16(0x400), "optical or PCR duplicate" ), + (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] + @assert bits isa UInt16 "The bits must be of type UInt16." + sym = Symbol("FLAG_", name) + docstring = """ $sym + SAM/BAM flag: $doc + + See also: [`flag`](@ref) + """ + @eval begin + @doc $(docstring) const $(sym) = $(bits) + end +end + +""" + ispaired(record::XAMRecord)::Bool + +Query whether the `record`'s template has multiple segments in sequencing. +""" +function ispaired(record::XAMRecord)::Bool + return flag(record) & FLAG_PAIRED == FLAG_PAIRED +end + +""" + isproperpair(record::XAMRecord)::Bool + +Query whether each segment of the `record`'s template properly aligned according to the aligner. +""" +function isproperpair(record::XAMRecord)::Bool + return flag(record) & PROPER_PAIR == PROPER_PAIR +end + +""" + isunmapped(record::XAMRecord)::Bool + +Query whether the `record` is unmapped. +""" +function isunmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_UNMAP == FLAG_UNMAP +end + +""" + ismapped(record::XAMRecord)::Bool + +Query whether the `record` is mapped. +""" +function ismapped(record::XAMRecord)::Bool + # return flag(record) & FLAG_UNMAP == 0 + return isfilled(record) && (flag(record) & FLAG_UNMAP == 0) +end + +""" + ismateunmapped(record::XAMRecord)::Bool + +Query whether the `record`'s mate is unmapped. +""" +function ismateunmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_MUNMAP == FLAG_MUNMAP +end + +""" + isnextmapped(record::XAMRecord)::Bool + +Test if the mate/next read of `record` is mapped. +""" +function isnextmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_MUNMAP == 0 +end + +""" + isreverse(record::XAMRecord)::Bool + +Query whether the `record` is mapped to the reverse strand. +""" +function isreverse(record::XAMRecord)::Bool + return flag(record) & FLAG_REVERSE == FLAG_REVERSE +end + +""" + isforward(record::XAMRecord)::Bool + +Query whether the `record` is mapped to the forward strand. +""" +function isforward(record::XAMRecord)::Bool + return flag(record) & FLAG_REVERSE == 0 +end + +""" + ispositivestrand(record::XAMRecord)::Bool + +Query whether `record` is aligned to the positive strand. +""" +function ispositivestrand(record::XAMRecord)::Bool + return isforward(record) +end + +""" + ispositivestrand(record::XAMRecord)::Bool + +Query whether `record` is aligned to the negative strand. +""" +function isnegativestrand(record::XAMRecord)::Bool + return isreverse(record) +end + +""" + ismatereverse(record::XAMRecord)::Bool + +Query whether the `record`'s mate is mapped to the reverse strand. +""" +function ismatereverse(record::XAMRecord)::Bool + return flag(record) & FLAG_MREVERSE == FLAG_MREVERSE +end + +""" + isread1(record::XAMRecord)::Bool + +Query whether the `record` is read1. +""" +function isread1(record::XAMRecord)::Bool + return flag(record) & FLAG_READ1 == FLAG_READ1 +end + +""" + isread2(record::XAMRecord)::Bool + +Query whether the `record` is read2. +""" +function isread2(record::XAMRecord)::Bool + return flag(record) & FLAG_READ2 == FLAG_READ2 +end + +""" + issecondaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is a secondary alignment. +""" +function issecondaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SECONDARY == FLAG_SECONDARY +end + +""" + isprimaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is the primary alignment. +""" +function isprimaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SECONDARY == 0 +end + +""" + isqcfail(record::XAMRecord)::Bool + +Query whether the `record` did not pass filters, such as platform/vendor quality controls. +""" +function isqcfail(record::XAMRecord)::Bool + return flag(record) & FLAG_QCFAIL == FLAG_QCFAIL +end + +""" + isduplicate(record::XAMRecord)::Bool + +Query whether the `record` is a PCR or optical duplicate. +""" +function isduplicate(record::XAMRecord)::Bool + return flag(record) & FLAG_DUP == FLAG_DUP +end + +""" + issupplementaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is a supplementary alignment. +""" +function issupplementaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY +end + +""" + isprimary(record::XAMRecord)::Bool + +Query whether `record` is a primary line of the read. + +This is equivalent to `flag(record) & 0x900 == 0`. +""" +function isprimary(record::XAMRecord)::Bool + return flag(record) & 0x900 == 0 +end diff --git a/src/sam/flags.jl b/src/sam/flags.jl deleted file mode 100644 index ded0df1..0000000 --- a/src/sam/flags.jl +++ /dev/null @@ -1,29 +0,0 @@ -# SAM Flags -# ========= -# -# Bitwise flags (or FLAG). - -for (name, bits, doc) in [ - (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), - (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), - (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with SAM.FLAG_PROPER_PAIR" ), - (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), - (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), - (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), - (:READ1, UInt16(0x040), "this is read1" ), - (:READ2, UInt16(0x080), "this is read2" ), - (:SECONDARY, UInt16(0x100), "not primary alignment" ), - (:QCFAIL, UInt16(0x200), "QC failure" ), - (:DUP, UInt16(0x400), "optical or PCR duplicate" ), - (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] - @assert bits isa UInt16 "The bits must be of type UInt16." - sym = Symbol("FLAG_", name) - docstring = """ $sym - SAM/BAM flag: $doc - - See also: [`flag`](@ref) - """ - @eval begin - @doc $(docstring) const $(sym) = $(bits) - end -end diff --git a/src/sam/record.jl b/src/sam/record.jl index befa967..7a2a22b 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -159,26 +159,6 @@ function hasflag(record::Record) return isfilled(record) end -""" - ismapped(record::Record)::Bool - -Test if `record` is mapped. -""" -function ismapped(record::Record)::Bool - return isfilled(record) && (flag(record) & FLAG_UNMAP == 0) -end - -""" - isprimary(record::Record)::Bool - -Test if `record` is a primary line of the read. - -This is equivalent to `flag(record) & 0x900 == 0`. -""" -function isprimary(record::Record)::Bool - return flag(record) & 0x900 == 0 -end - """ refname(record::Record)::String @@ -227,15 +207,6 @@ function hasrightposition(record::Record) return hasposition(record) && hasalignment(record) end -""" - isnextmapped(record::Record)::Bool - -Test if the mate/next read of `record` is mapped. -""" -function isnextmapped(record::Record)::Bool - return isfilled(record) && (flag(record) & FLAG_MUNMAP == 0) -end - """ nextrefname(record::Record)::String diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 70cf2de..f3b73e6 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,7 +11,8 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, + ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. using Printf: @sprintf @@ -48,7 +49,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I end -include("flags.jl") include("metainfo.jl") include("record.jl") include("header.jl")