From 480dba455eaa14eec62e205858f6775184356a1e Mon Sep 17 00:00:00 2001 From: zeptodoctor <44736852+zeptodoctor@users.noreply.github.com> Date: Mon, 20 Jan 2020 22:21:02 +0000 Subject: [PATCH] build based on de4f93e --- dev/api/api/index.html | 6 +++--- dev/hts-files/index.html | 16 ++++++++-------- dev/index.html | 2 +- dev/search/index.html | 2 +- dev/search_index.js | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dev/api/api/index.html b/dev/api/api/index.html index 218941a..4f48435 100644 --- a/dev/api/api/index.html +++ b/dev/api/api/index.html @@ -1,5 +1,5 @@ -API Reference · XAM.jl

Public API Reference

Contents

Index

SAM API

The following methods and types are provided by the SAM submodule for public use.

XAM.SAM.MetaInfoMethod
MetaInfo(tag::AbstractString, value)

Create a SAM metainfo with tag and value.

tag is a two-byte ASCII string. If tag is "CO", value must be a string; otherwise, value is an iterable object with key and value pairs.

Examples

julia> SAM.MetaInfo("CO", "some comment")
+API Reference · XAM.jl

Public API Reference

Contents

Index

SAM API

The following methods and types are provided by the SAM submodule for public use.

XAM.SAM.MetaInfoMethod
MetaInfo(tag::AbstractString, value)

Create a SAM metainfo with tag and value.

tag is a two-byte ASCII string. If tag is "CO", value must be a string; otherwise, value is an iterable object with key and value pairs.

Examples

julia> SAM.MetaInfo("CO", "some comment")
 BioAlignments.SAM.MetaInfo:
     tag: CO
   value: some comment
@@ -13,7 +13,7 @@ BioAlignments.SAM.MetaInfo:
   value: SN=chr1 LN=12345
 
 julia> string(ans)
-"@SQ	SN:chr1	LN:12345"
source
XAM.SAM.MetaInfoMethod
MetaInfo(str::AbstractString)

Create a SAM metainfo from str.

Examples

julia> SAM.MetaInfo("@CO	some comment")
+"@SQ	SN:chr1	LN:12345"
source
XAM.SAM.MetaInfoMethod
MetaInfo(str::AbstractString)

Create a SAM metainfo from str.

Examples

julia> SAM.MetaInfo("@CO	some comment")
 BioAlignments.SAM.MetaInfo:
     tag: CO
   value: some comment
@@ -21,4 +21,4 @@ BioAlignments.SAM.MetaInfo:
 julia> SAM.MetaInfo("@SQ	SN:chr1	LN:12345")
 BioAlignments.SAM.MetaInfo:
     tag: SQ
-  value: SN=chr1 LN=12345
source
XAM.SAM.ReaderMethod
SAM.Reader(input::IO)

Create a data reader of the SAM file format.

Arguments

  • input: data source
source
XAM.SAM.RecordMethod
SAM.Record(str::AbstractString)

Create a SAM record from str. This function verifies the format and indexes fields for accessors.

source
XAM.SAM.RecordMethod
SAM.Record(data::Vector{UInt8})

Create a SAM record from data. This function verifies the format and indexes fields for accessors. Note that the ownership of data is transferred to a new record object.

source
XAM.SAM.WriterType
Writer(output::IO, header::Header=Header())

Create a data writer of the SAM file format.

Arguments

  • output: data sink
  • header=Header(): SAM header object
source
Base.findallMethod
find(header::Header, key::AbstractString)::Vector{MetaInfo}

Find metainfo objects satisfying SAM.tag(metainfo) == key.

source
XAM.SAM.auxdataMethod
auxdata(record::Record)::Dict{String,Any}

Get the auxiliary data (optional fields) of record.

source
XAM.SAM.iscommentMethod
iscomment(metainfo::MetaInfo)::Bool

Test if metainfo is a comment (i.e. its tag is "CO").

source
XAM.SAM.isprimaryMethod
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.SAM.keyvaluesMethod
keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}}

Get the values of metainfo as string pairs.

source
XAM.SAM.nextrefnameMethod
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.SAM.positionMethod
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.SAM.qualityMethod
quality(::Type{String}, record::Record)::String

Get the ASCII-encoded base quality of record.

source
XAM.SAM.qualityMethod
quality(record::Record)::Vector{UInt8}

Get the Phred-scaled base quality of record.

source
XAM.SAM.refnameMethod
refname(record::Record)::String

Get the reference sequence name of record.

source
XAM.SAM.sequenceMethod
sequence(::Type{String}, record::Record)::String

Get the segment sequence of record as String.

source
XAM.SAM.sequenceMethod
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
XAM.SAM.valueMethod
value(metainfo::MetaInfo)::String

Get the value of metainfo as a string.

source

BAM API

The following methods and types are provided by the BAM submodule for public use.

XAM.BAM.BAIMethod
BAI(filename::AbstractString)

Load a BAI index from filename.

source
XAM.BAM.ReaderType
BAM.Reader(input::IO; index=nothing)

Create a data reader of the BAM file format.

Arguments

  • input: data source
  • index=nothing: filepath to a random access index (currently bai is supported)
source
XAM.BAM.WriterType
BAM.Writer(output::BGZFStream, header::SAM.Header)

Create a data writer of the BAM file format.

Arguments

  • output: data sink
  • header: SAM header object
source
BioGenerics.headerMethod
header(reader::Reader; fillSQ::Bool=false)::SAM.Header

Get the header of reader.

If fillSQ is true, this function fills missing "SQ" metainfo in the header.

source
XAM.BAM.cigarFunction
cigar(record::Record)::String

Get the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar_rle.

source
XAM.BAM.cigar_rleFunction
cigar_rle(record::Record, checkCG::Bool = true)::Tuple{Vector{BioAlignments.Operation},Vector{Int}}

Get a run-length encoded tuple (ops, lens) of the CIGAR string in record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar.

source
XAM.BAM.ispositivestrandMethod
ispositivestrand(record::Record)::Bool

Test if record is aligned to the positive strand.

This is equivalent to flag(record) & 0x10 == 0.

source
XAM.BAM.isprimaryMethod
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.BAM.n_cigar_opFunction
n_cigar_op(record::Record, checkCG::Bool = true)

Return the number of operations in the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the number of operations in the true cigar string, because this is probably what you want, the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to get the number of operations in the cigar field of the BAM record, then set checkCG to false.

source
XAM.BAM.nextpositionMethod
nextposition(record::Record)::Int

Get the 1-based leftmost mapping position of the next/mate read of record.

source
XAM.BAM.nextrefnameMethod
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.BAM.positionMethod
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.BAM.refidMethod
refid(record::Record)::Int

Get the reference sequence ID of record.

The ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.

See also: BAM.rname

source
XAM.BAM.reflenMethod
reflen(record::Record)::Int

Get the length of the reference sequence this record applies to.

source
XAM.BAM.refnameMethod
refname(record::Record)::String

Get the reference sequence name of record.

See also: BAM.refid

source
XAM.BAM.sequenceMethod
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
+ value: SN=chr1 LN=12345
source
XAM.SAM.ReaderMethod
SAM.Reader(input::IO)

Create a data reader of the SAM file format.

Arguments

  • input: data source
source
XAM.SAM.RecordMethod
SAM.Record(str::AbstractString)

Create a SAM record from str. This function verifies the format and indexes fields for accessors.

source
XAM.SAM.RecordMethod
SAM.Record(data::Vector{UInt8})

Create a SAM record from data. This function verifies the format and indexes fields for accessors. Note that the ownership of data is transferred to a new record object.

source
XAM.SAM.WriterType
Writer(output::IO, header::Header=Header())

Create a data writer of the SAM file format.

Arguments

  • output: data sink
  • header=Header(): SAM header object
source
Base.findallMethod
find(header::Header, key::AbstractString)::Vector{MetaInfo}

Find metainfo objects satisfying SAM.tag(metainfo) == key.

source
XAM.SAM.auxdataMethod
auxdata(record::Record)::Dict{String,Any}

Get the auxiliary data (optional fields) of record.

source
XAM.SAM.iscommentMethod
iscomment(metainfo::MetaInfo)::Bool

Test if metainfo is a comment (i.e. its tag is "CO").

source
XAM.SAM.isprimaryMethod
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.SAM.keyvaluesMethod
keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}}

Get the values of metainfo as string pairs.

source
XAM.SAM.nextrefnameMethod
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.SAM.positionMethod
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.SAM.qualityMethod
quality(::Type{String}, record::Record)::String

Get the ASCII-encoded base quality of record.

source
XAM.SAM.qualityMethod
quality(record::Record)::Vector{UInt8}

Get the Phred-scaled base quality of record.

source
XAM.SAM.refnameMethod
refname(record::Record)::String

Get the reference sequence name of record.

source
XAM.SAM.sequenceMethod
sequence(::Type{String}, record::Record)::String

Get the segment sequence of record as String.

source
XAM.SAM.sequenceMethod
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
XAM.SAM.valueMethod
value(metainfo::MetaInfo)::String

Get the value of metainfo as a string.

source

BAM API

The following methods and types are provided by the BAM submodule for public use.

XAM.BAM.BAIMethod
BAI(filename::AbstractString)

Load a BAI index from filename.

source
XAM.BAM.ReaderType
BAM.Reader(input::IO; index=nothing)

Create a data reader of the BAM file format.

Arguments

  • input: data source
  • index=nothing: filepath to a random access index (currently bai is supported)
source
XAM.BAM.WriterType
BAM.Writer(output::BGZFStream, header::SAM.Header)

Create a data writer of the BAM file format.

Arguments

  • output: data sink
  • header: SAM header object
source
BioGenerics.headerMethod
header(reader::Reader; fillSQ::Bool=false)::SAM.Header

Get the header of reader.

If fillSQ is true, this function fills missing "SQ" metainfo in the header.

source
XAM.BAM.cigarFunction
cigar(record::Record)::String

Get the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar_rle.

source
XAM.BAM.cigar_rleFunction
cigar_rle(record::Record, checkCG::Bool = true)::Tuple{Vector{BioAlignments.Operation},Vector{Int}}

Get a run-length encoded tuple (ops, lens) of the CIGAR string in record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar.

source
XAM.BAM.ispositivestrandMethod
ispositivestrand(record::Record)::Bool

Test if record is aligned to the positive strand.

This is equivalent to flag(record) & 0x10 == 0.

source
XAM.BAM.isprimaryMethod
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.BAM.n_cigar_opFunction
n_cigar_op(record::Record, checkCG::Bool = true)

Return the number of operations in the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the number of operations in the true cigar string, because this is probably what you want, the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to get the number of operations in the cigar field of the BAM record, then set checkCG to false.

source
XAM.BAM.nextpositionMethod
nextposition(record::Record)::Int

Get the 1-based leftmost mapping position of the next/mate read of record.

source
XAM.BAM.nextrefnameMethod
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.BAM.positionMethod
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.BAM.refidMethod
refid(record::Record)::Int

Get the reference sequence ID of record.

The ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.

See also: BAM.rname

source
XAM.BAM.reflenMethod
reflen(record::Record)::Int

Get the length of the reference sequence this record applies to.

source
XAM.BAM.refnameMethod
refname(record::Record)::String

Get the reference sequence name of record.

See also: BAM.refid

source
XAM.BAM.sequenceMethod
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
diff --git a/dev/hts-files/index.html b/dev/hts-files/index.html index 13984e4..65a8b7e 100644 --- a/dev/hts-files/index.html +++ b/dev/hts-files/index.html @@ -1,12 +1,12 @@ -SAM and BAM · XAM.jl

SAM and BAM

Introduction

High-throughput sequencing (HTS) technologies generate a large amount of data in the form of a large number of nucleotide sequencing reads. One of the most common tasks in bioinformatics is to align these reads against known reference genomes, chromosomes, or contigs. BioAlignments provides several data formats commonly used for this kind of task.

BioAlignments offers high-performance tools for SAM and BAM file formats, which are the most popular file formats.

If you have questions about the SAM and BAM formats or any of the terminology used when discussing these formats, see the published [specification][samtools-spec], which is maintained by the [samtools group][samtools].

A very very simple SAM file looks like the following:

@HD VN:1.6 SO:coordinate
+SAM and BAM · XAM.jl

SAM and BAM

Introduction

High-throughput sequencing (HTS) technologies generate a large amount of data in the form of a large number of nucleotide sequencing reads. One of the most common tasks in bioinformatics is to align these reads against known reference genomes, chromosomes, or contigs. BioAlignments provides several data formats commonly used for this kind of task.

BioAlignments offers high-performance tools for SAM and BAM file formats, which are the most popular file formats.

If you have questions about the SAM and BAM formats or any of the terminology used when discussing these formats, see the published specification, which is maintained by the samtools group.

A very very simple SAM file looks like the following:

@HD VN:1.6 SO:coordinate
 @SQ SN:ref LN:45
 r001   99 ref  7 30 8M2I4M1D3M = 37  39 TTAGATAAAGGATACTG *
 r002    0 ref  9 30 3S6M1P1I4M *  0   0 AAAAGATAAGGATA    *
 r003    0 ref  9 30 5S6M       *  0   0 GCCTAAGCTAA       * SA:Z:ref,29,-,6H5M,17,0;
 r004    0 ref 16 30 6M14N5M    *  0   0 ATAGCTTCAGC       *
 r003 2064 ref 29 17 6H5M       *  0   0 TAGGC             * SA:Z:ref,9,+,5S6M,30,1;
-r001  147 ref 37 30 9M         =  7 -39 CAGCGGCAT         * NM:i:1

Where the first two lines are part of the "header", and the following lines are "records". Each record describes how a read aligns to some reference sequence. Sometimes one record describes one read, but there are other cases like chimeric reads and split alignments, where multiple records apply to one read. In the example above, r003 is a chimeric read, and r004 is a split alignment, and r001 are mate pair reads. Again, we refer you to the official [specification][samtools-spec] for more details.

A BAM file stores this same information but in a binary and compressible format that does not make for pretty printing here!

Reading SAM and BAM files

A typical script iterating over all records in a file looks like below:

using BioAlignments
+r001  147 ref 37 30 9M         =  7 -39 CAGCGGCAT         * NM:i:1

Where the first two lines are part of the "header", and the following lines are "records". Each record describes how a read aligns to some reference sequence. Sometimes one record describes one read, but there are other cases like chimeric reads and split alignments, where multiple records apply to one read. In the example above, r003 is a chimeric read, and r004 is a split alignment, and r001 are mate pair reads. Again, we refer you to the official specification for more details.

A BAM file stores this same information but in a binary and compressible format that does not make for pretty printing here!

Reading SAM and BAM files

A typical script iterating over all records in a file looks like below:

using BioAlignments
 
 # Open a BAM file.
 reader = open(BAM.Reader, "data.bam")
@@ -26,7 +26,7 @@ record = BAM.Record()
 while !eof(reader)
     read!(reader, record)
     # do something
-end

SAM and BAM Headers

Both SAM.Reader and BAM.Reader implement the header function, which returns a SAM.Header object. To extract certain information out of the headers, you can use the find method on the header to extract information according to SAM/BAM tag. Again we refer you to the [specification][samtools-spec] for full details of all the different tags that can occur in headers, and what they mean.

Below is an example of extracting all the info about the reference sequences from the BAM header. In SAM/BAM, any description of a reference sequence is stored in the header, under a tag denoted SQ (think reference SeQuence!).

julia> reader = open(SAM.Reader, "data.sam");
+end

SAM and BAM Headers

Both SAM.Reader and BAM.Reader implement the header function, which returns a SAM.Header object. To extract certain information out of the headers, you can use the find method on the header to extract information according to SAM/BAM tag. Again we refer you to the specification for full details of all the different tags that can occur in headers, and what they mean.

Below is an example of extracting all the info about the reference sequences from the BAM header. In SAM/BAM, any description of a reference sequence is stored in the header, under a tag denoted SQ (think reference SeQuence!).

julia> reader = open(SAM.Reader, "data.sam");
 
 julia> find(header(reader), "SQ")
 7-element Array{Bio.Align.SAM.MetaInfo,1}:
@@ -51,17 +51,17 @@ julia> find(header(reader), "SQ")
  Bio.Align.SAM.MetaInfo:
     tag: SQ
   value: SN=mitochondria LN=366924
-

In the above we can see there were 7 sequences in the reference: 5 chromosomes, one chloroplast sequence, and one mitochondrial sequence.

SAM and BAM Records

BioAlignments supports the following accessors for SAM.Record types.

XAM.SAM.flagFunction
flag(record::Record)::UInt16

Get the bitwise flag of record.

source
XAM.SAM.isprimaryFunction
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.SAM.refnameFunction
refname(record::Record)::String

Get the reference sequence name of record.

source
XAM.SAM.positionFunction
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.SAM.nextrefnameFunction
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.SAM.sequenceFunction
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
sequence(::Type{String}, record::Record)::String

Get the segment sequence of record as String.

source
XAM.SAM.qualityFunction
quality(record::Record)::Vector{UInt8}

Get the Phred-scaled base quality of record.

source
quality(::Type{String}, record::Record)::String

Get the ASCII-encoded base quality of record.

source
XAM.SAM.auxdataFunction
auxdata(record::Record)::Dict{String,Any}

Get the auxiliary data (optional fields) of record.

source

BioAlignments supports the following accessors for BAM.Record types.

XAM.BAM.flagFunction
flag(record::Record)::UInt16

Get the bitwise flag of record.

source
XAM.BAM.isprimaryFunction
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.BAM.refidFunction
refid(record::Record)::Int

Get the reference sequence ID of record.

The ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.

See also: BAM.rname

source
XAM.BAM.refnameFunction
refname(record::Record)::String

Get the reference sequence name of record.

See also: BAM.refid

source
XAM.BAM.reflenFunction
reflen(record::Record)::Int

Get the length of the reference sequence this record applies to.

source
XAM.BAM.positionFunction
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.BAM.nextrefidFunction
nextrefid(record::Record)::Int

Get the next/mate reference sequence ID of record.

source
XAM.BAM.nextrefnameFunction
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.BAM.nextpositionFunction
nextposition(record::Record)::Int

Get the 1-based leftmost mapping position of the next/mate read of record.

source
XAM.BAM.cigarFunction
cigar(record::Record)::String

Get the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar_rle.

source
XAM.BAM.sequenceFunction
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source

Accessing auxiliary data

SAM and BAM records support the storing of optional data fields associated with tags.

Tagged auxiliary data follows a format of TAG:TYPE:VALUE. TAG is a two-letter string, and each tag can only appear once per record. TYPE is a single case-sensetive letter which defined the format of VALUE.

TypeDescription
'A'Printable character
'i'Signed integer
'f'Single-precision floating number
'Z'Printable string, including space
'H'Byte array in Hex format
'B'Integer of numeric array

For more information about these tags and their types we refer you to the [SAM/BAM specification][samtools-spec] and the additional [optional fields specification][samtags] document.

There are some tags that are reserved, predefined standard tags, for specific uses.

To access optional fields stored in tags, you use getindex indexing syntax on the record object. Note that accessing optional tag fields will result in type instability in Julia. This is because the type of the optional data is not known until run-time, as the tag is being read. This can have a significant impact on performance. To limit this, if the user knows the type of a value in advance, specifying it as a type annotation will alleviate the problem:

Below is an example of looping over records in a bam file and using indexing syntax to get the data stored in the "NM" tag. Note the UInt8 type assertion to alleviate type instability.

for record in open(BAM.Reader, "data.bam")
+

In the above we can see there were 7 sequences in the reference: 5 chromosomes, one chloroplast sequence, and one mitochondrial sequence.

SAM and BAM Records

BioAlignments supports the following accessors for SAM.Record types.

XAM.SAM.flagFunction
flag(record::Record)::UInt16

Get the bitwise flag of record.

source
XAM.SAM.isprimaryFunction
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.SAM.refnameFunction
refname(record::Record)::String

Get the reference sequence name of record.

source
XAM.SAM.positionFunction
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.SAM.nextrefnameFunction
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.SAM.sequenceFunction
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source
sequence(::Type{String}, record::Record)::String

Get the segment sequence of record as String.

source
XAM.SAM.qualityFunction
quality(record::Record)::Vector{UInt8}

Get the Phred-scaled base quality of record.

source
quality(::Type{String}, record::Record)::String

Get the ASCII-encoded base quality of record.

source
XAM.SAM.auxdataFunction
auxdata(record::Record)::Dict{String,Any}

Get the auxiliary data (optional fields) of record.

source

BioAlignments supports the following accessors for BAM.Record types.

XAM.BAM.flagFunction
flag(record::Record)::UInt16

Get the bitwise flag of record.

source
XAM.BAM.isprimaryFunction
isprimary(record::Record)::Bool

Test if record is a primary line of the read.

This is equivalent to flag(record) & 0x900 == 0.

source
XAM.BAM.refidFunction
refid(record::Record)::Int

Get the reference sequence ID of record.

The ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.

See also: BAM.rname

source
XAM.BAM.refnameFunction
refname(record::Record)::String

Get the reference sequence name of record.

See also: BAM.refid

source
XAM.BAM.reflenFunction
reflen(record::Record)::Int

Get the length of the reference sequence this record applies to.

source
XAM.BAM.positionFunction
position(record::Record)::Int

Get the 1-based leftmost mapping position of record.

source
XAM.BAM.nextrefidFunction
nextrefid(record::Record)::Int

Get the next/mate reference sequence ID of record.

source
XAM.BAM.nextrefnameFunction
nextrefname(record::Record)::String

Get the reference name of the mate/next read of record.

source
XAM.BAM.nextpositionFunction
nextposition(record::Record)::Int

Get the 1-based leftmost mapping position of the next/mate read of record.

source
XAM.BAM.cigarFunction
cigar(record::Record)::String

Get the CIGAR string of record.

Note that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.

Calling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.

If you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.

See also BAM.cigar_rle.

source
XAM.BAM.sequenceFunction
sequence(record::Record)::BioSequences.DNASequence

Get the segment sequence of record.

source

Accessing auxiliary data

SAM and BAM records support the storing of optional data fields associated with tags.

Tagged auxiliary data follows a format of TAG:TYPE:VALUE. TAG is a two-letter string, and each tag can only appear once per record. TYPE is a single case-sensetive letter which defined the format of VALUE.

TypeDescription
'A'Printable character
'i'Signed integer
'f'Single-precision floating number
'Z'Printable string, including space
'H'Byte array in Hex format
'B'Integer of numeric array

For more information about these tags and their types we refer you to the SAM/BAM specification and the additional optional fields specification document.

There are some tags that are reserved, predefined standard tags, for specific uses.

To access optional fields stored in tags, you use getindex indexing syntax on the record object. Note that accessing optional tag fields will result in type instability in Julia. This is because the type of the optional data is not known until run-time, as the tag is being read. This can have a significant impact on performance. To limit this, if the user knows the type of a value in advance, specifying it as a type annotation will alleviate the problem:

Below is an example of looping over records in a bam file and using indexing syntax to get the data stored in the "NM" tag. Note the UInt8 type assertion to alleviate type instability.

for record in open(BAM.Reader, "data.bam")
     nm = record["NM"]::UInt8
     # do something
-end

Getting records in a range

BioAlignments supports the BAI index to fetch records in a specific range from a BAM file. from a BAM file. [Samtools][samtools] provides index subcommand to create an index file (.bai) from a sorted BAM file.

$ samtools index -b SRR1238088.sort.bam
+end

Getting records in a range

BioAlignments supports the BAI index to fetch records in a specific range from a BAM file. from a BAM file. (https://samtools.github.io/)(https://samtools.github.io/) provides index subcommand to create an index file (.bai) from a sorted BAM file.

$ samtools index -b SRR1238088.sort.bam
 $ ls SRR1238088.sort.bam*
 SRR1238088.sort.bam     SRR1238088.sort.bam.bai

eachoverlap(reader, chrom, range) returns an iterator of BAM records overlapping the query interval:

reader = open(BAM.Reader, "SRR1238088.sort.bam", index="SRR1238088.sort.bam.bai")
 for record in eachoverlap(reader, "Chr2", 10000:11000)
     # `record` is a BAM.Record object
     # ...
 end
-close(reader)

Getting records overlapping genomic features

eachoverlap also accepts the Interval type defined in [GenomicFeatures.jl][genomicfeatures].

This allows you to do things like first read in the genomic features from a GFF3 file, and then for each feature, iterate over all the BAM records that overlap with that feature.

# Load GFF3 module.
+close(reader)

Getting records overlapping genomic features

eachoverlap also accepts the Interval type defined in GenomicFeatures.jl.

This allows you to do things like first read in the genomic features from a GFF3 file, and then for each feature, iterate over all the BAM records that overlap with that feature.

# Load GFF3 module.
 using GenomicFeatures
 using BioAlignments
 
@@ -98,9 +98,9 @@ SAM.Header(SAM.MetaInfo[SAM.MetaInfo:
   value: SN=ref LN=45])
 

Then to create the writer for a SAM file, construct a SAM.Writer using the header and an IO type:

julia> samw = SAM.Writer(open("my-data.sam", "w"), h)
 SAM.Writer(IOStream(<file my-data.sam>))
-

To make a BAM Writer is slightly different, as you need to use a specific stream type from the [BGZFStreams][bgzfstreams] package:

julia> using BGZFStreams
+

To make a BAM Writer is slightly different, as you need to use a specific stream type from the (https://github.com/BioJulia/BGZFStreams.jl)(https://github.com/BioJulia/BGZFStreams.jl) package:

julia> using BGZFStreams
 
 julia> bamw = BAM.Writer(BGZFStream(open("my-data.bam", "w"), "w"))
 BAM.Writer(BGZFStreams.BGZFStream{IOStream}(<mode=write>))
 

Once you have a BAM or SAM writer, you can use the write method to write BAM.Records or SAM.Records to file:

julia> write(bamw, rec) # Here rec is a `BAM.Record`
-330780

[samtools]: https://samtools.github.io/ [samtools-spec]: https://samtools.github.io/hts-specs/SAMv1.pdf [samtags]: https://samtools.github.io/hts-specs/SAMtags.pdf [bgzfstreams]: https://github.com/BioJulia/BGZFStreams.jl [genomicfeatures]: https://github.com/BioJulia/GenomicFeatures.jl

+330780
diff --git a/dev/index.html b/dev/index.html index d17fef0..9cd5281 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,2 +1,2 @@ -Home · XAM.jl

XAM.jl

Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public. Latest Release MIT license Join the chat at https://gitter.im/BioJulia/XAM.jl

Description

XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.

Installation

XAM is made available to install through BioJulia's package registry. Julia's package manager only uses the "General" package registry by default. Your Julia configuration needs to include the BioJulia registry to be able to install the latest version of XAM.

To add the BioJulia registry from the Julia REPL, press ] to enter pkg mode, then enter the following command:

registry add https://github.com/BioJulia/BioJuliaRegistry.git

Once the registry is added, you can install XAM while in pkg mode with the following command:

add XAM

If you are interested in the cutting edge of the development, please check out the develop branch to try new features before release.

+Home · XAM.jl

XAM.jl

Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public. Latest Release MIT license Join the chat at https://gitter.im/BioJulia/XAM.jl

Description

XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.

Installation

XAM is made available to install through BioJulia's package registry. Julia's package manager only uses the "General" package registry by default. Your Julia configuration needs to include the BioJulia registry to be able to install the latest version of XAM.

To add the BioJulia registry from the Julia REPL, press ] to enter pkg mode, then enter the following command:

registry add https://github.com/BioJulia/BioJuliaRegistry.git

Once the registry is added, you can install XAM while in pkg mode with the following command:

add XAM

If you are interested in the cutting edge of the development, please check out the develop branch to try new features before release.

diff --git a/dev/search/index.html b/dev/search/index.html index 319f86e..c1e5564 100644 --- a/dev/search/index.html +++ b/dev/search/index.html @@ -1,2 +1,2 @@ -Search · XAM.jl

Loading search...

    +Search · XAM.jl

    Loading search...

      diff --git a/dev/search_index.js b/dev/search_index.js index d2d0e45..bdfae09 100644 --- a/dev/search_index.js +++ b/dev/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"#XAM.jl-1","page":"Home","title":"XAM.jl","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"(Image: Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.) (Image: Latest Release) (Image: MIT license) (Image: Join the chat at https://gitter.im/BioJulia/XAM.jl)","category":"page"},{"location":"#Description-1","page":"Home","title":"Description","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.","category":"page"},{"location":"#Installation-1","page":"Home","title":"Installation","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"XAM is made available to install through BioJulia's package registry. Julia's package manager only uses the \"General\" package registry by default. Your Julia configuration needs to include the BioJulia registry to be able to install the latest version of XAM.","category":"page"},{"location":"#","page":"Home","title":"Home","text":"To add the BioJulia registry from the Julia REPL, press ] to enter pkg mode, then enter the following command:","category":"page"},{"location":"#","page":"Home","title":"Home","text":"registry add https://github.com/BioJulia/BioJuliaRegistry.git","category":"page"},{"location":"#","page":"Home","title":"Home","text":"Once the registry is added, you can install XAM while in pkg mode with the following command:","category":"page"},{"location":"#","page":"Home","title":"Home","text":"add XAM","category":"page"},{"location":"#","page":"Home","title":"Home","text":"If you are interested in the cutting edge of the development, please check out the develop branch to try new features before release.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"CurrentModule = XAM\nDocTestSetup = quote\n using XAM\nend","category":"page"},{"location":"api/api/#Public-API-Reference-1","page":"API Reference","title":"Public API Reference","text":"","category":"section"},{"location":"api/api/#Contents-1","page":"API Reference","title":"Contents","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Pages = [\"api.md\"]","category":"page"},{"location":"api/api/#Index-1","page":"API Reference","title":"Index","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Pages = [\"api.md\"]","category":"page"},{"location":"api/api/#SAM-API-1","page":"API Reference","title":"SAM API","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"The following methods and types are provided by the SAM submodule for public use.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Modules = [XAM.SAM]\nprivate = false","category":"page"},{"location":"api/api/#XAM.SAM.FLAG_DUP","page":"API Reference","title":"XAM.SAM.FLAG_DUP","text":"0x0400: optical or PCR duplicate\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_MREVERSE","page":"API Reference","title":"XAM.SAM.FLAG_MREVERSE","text":"0x0020: the mate is mapped to the reverse strand\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_MUNMAP","page":"API Reference","title":"XAM.SAM.FLAG_MUNMAP","text":"0x0008: the mate is unmapped\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_PAIRED","page":"API Reference","title":"XAM.SAM.FLAG_PAIRED","text":"0x0001: the read is paired in sequencing, no matter whether it is mapped in a pair\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_PROPER_PAIR","page":"API Reference","title":"XAM.SAM.FLAG_PROPER_PAIR","text":"0x0002: the read is mapped in a proper pair\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_QCFAIL","page":"API Reference","title":"XAM.SAM.FLAG_QCFAIL","text":"0x0200: QC failure\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_READ1","page":"API Reference","title":"XAM.SAM.FLAG_READ1","text":"0x0040: this is read1\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_READ2","page":"API Reference","title":"XAM.SAM.FLAG_READ2","text":"0x0080: this is read2\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_REVERSE","page":"API Reference","title":"XAM.SAM.FLAG_REVERSE","text":"0x0010: the read is mapped to the reverse strand\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_SECONDARY","page":"API Reference","title":"XAM.SAM.FLAG_SECONDARY","text":"0x0100: not primary alignment\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_SUPPLEMENTARY","page":"API Reference","title":"XAM.SAM.FLAG_SUPPLEMENTARY","text":"0x0800: supplementary alignment\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_UNMAP","page":"API Reference","title":"XAM.SAM.FLAG_UNMAP","text":"0x0004: the read itself is unmapped; conflictive with SAM.FLAGPROPERPAIR\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.Header-Tuple{}","page":"API Reference","title":"XAM.SAM.Header","text":"SAM.Header()\n\nCreate an empty header.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.MetaInfo-Tuple{AbstractString,Any}","page":"API Reference","title":"XAM.SAM.MetaInfo","text":"MetaInfo(tag::AbstractString, value)\n\nCreate a SAM metainfo with tag and value.\n\ntag is a two-byte ASCII string. If tag is \"CO\", value must be a string; otherwise, value is an iterable object with key and value pairs.\n\nExamples\n\njulia> SAM.MetaInfo(\"CO\", \"some comment\")\nBioAlignments.SAM.MetaInfo:\n tag: CO\n value: some comment\n\njulia> string(ans)\n\"@CO\tsome comment\"\n\njulia> SAM.MetaInfo(\"SQ\", [\"SN\" => \"chr1\", \"LN\" => 12345])\nBioAlignments.SAM.MetaInfo:\n tag: SQ\n value: SN=chr1 LN=12345\n\njulia> string(ans)\n\"@SQ\tSN:chr1\tLN:12345\"\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.MetaInfo-Tuple{AbstractString}","page":"API Reference","title":"XAM.SAM.MetaInfo","text":"MetaInfo(str::AbstractString)\n\nCreate a SAM metainfo from str.\n\nExamples\n\njulia> SAM.MetaInfo(\"@CO\tsome comment\")\nBioAlignments.SAM.MetaInfo:\n tag: CO\n value: some comment\n\njulia> SAM.MetaInfo(\"@SQ\tSN:chr1\tLN:12345\")\nBioAlignments.SAM.MetaInfo:\n tag: SQ\n value: SN=chr1 LN=12345\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Reader-Tuple{IO}","page":"API Reference","title":"XAM.SAM.Reader","text":"SAM.Reader(input::IO)\n\nCreate a data reader of the SAM file format.\n\nArguments\n\ninput: data source\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{AbstractString}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record(str::AbstractString)\n\nCreate a SAM record from str. This function verifies the format and indexes fields for accessors.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{Array{UInt8,1}}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record(data::Vector{UInt8})\n\nCreate a SAM record from data. This function verifies the format and indexes fields for accessors. Note that the ownership of data is transferred to a new record object.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record()\n\nCreate an unfilled SAM record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Writer","page":"API Reference","title":"XAM.SAM.Writer","text":"Writer(output::IO, header::Header=Header())\n\nCreate a data writer of the SAM file format.\n\nArguments\n\noutput: data sink\nheader=Header(): SAM header object\n\n\n\n\n\n","category":"type"},{"location":"api/api/#Base.findall-Tuple{XAM.SAM.Header,AbstractString}","page":"API Reference","title":"Base.findall","text":"find(header::Header, key::AbstractString)::Vector{MetaInfo}\n\nFind metainfo objects satisfying SAM.tag(metainfo) == key.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#BioGenerics.header-Tuple{XAM.SAM.Reader}","page":"API Reference","title":"BioGenerics.header","text":"header(reader::Reader)::Header\n\nGet the header of reader.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.alignlength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.alignment-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.auxdata-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.auxdata","text":"auxdata(record::Record)::Dict{String,Any}\n\nGet the auxiliary data (optional fields) of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.cigar-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.flag-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.iscomment-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.iscomment","text":"iscomment(metainfo::MetaInfo)::Bool\n\nTest if metainfo is a comment (i.e. its tag is \"CO\").\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.ismapped-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.isnextmapped-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.isprimary-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.keyvalues-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.keyvalues","text":"keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}}\n\nGet the values of metainfo as string pairs.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.mappingquality-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.nextposition-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the position of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.nextrefname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.position-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.quality-Tuple{Type{String},XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.quality","text":"quality(::Type{String}, record::Record)::String\n\nGet the ASCII-encoded base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.quality-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the Phred-scaled base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.refname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.rightposition-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.seqlength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.sequence-Tuple{Type{String},XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.sequence","text":"sequence(::Type{String}, record::Record)::String\n\nGet the segment sequence of record as String.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.sequence-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.tag-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.tag","text":"tag(metainfo::MetaInfo)::String\n\nGet the tag of metainfo.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.templength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.tempname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.value-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.value","text":"value(metainfo::MetaInfo)::String\n\nGet the value of metainfo as a string.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#BAM-API-1","page":"API Reference","title":"BAM API","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"The following methods and types are provided by the BAM submodule for public use.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Modules = [XAM.BAM]\nprivate = false","category":"page"},{"location":"api/api/#XAM.BAM.BAI-Tuple{AbstractString}","page":"API Reference","title":"XAM.BAM.BAI","text":"BAI(filename::AbstractString)\n\nLoad a BAI index from filename.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.BAI-Tuple{IO}","page":"API Reference","title":"XAM.BAM.BAI","text":"BAI(input::IO)\n\nLoad a BAI index from input.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.Reader","page":"API Reference","title":"XAM.BAM.Reader","text":"BAM.Reader(input::IO; index=nothing)\n\nCreate a data reader of the BAM file format.\n\nArguments\n\ninput: data source\nindex=nothing: filepath to a random access index (currently bai is supported)\n\n\n\n\n\n","category":"type"},{"location":"api/api/#XAM.BAM.Record","page":"API Reference","title":"XAM.BAM.Record","text":"BAM.Record()\n\nCreate an unfilled BAM record.\n\n\n\n\n\n","category":"type"},{"location":"api/api/#XAM.BAM.Writer","page":"API Reference","title":"XAM.BAM.Writer","text":"BAM.Writer(output::BGZFStream, header::SAM.Header)\n\nCreate a data writer of the BAM file format.\n\nArguments\n\noutput: data sink\nheader: SAM header object\n\n\n\n\n\n","category":"type"},{"location":"api/api/#BioGenerics.header-Tuple{XAM.BAM.Reader}","page":"API Reference","title":"BioGenerics.header","text":"header(reader::Reader; fillSQ::Bool=false)::SAM.Header\n\nGet the header of reader.\n\nIf fillSQ is true, this function fills missing \"SQ\" metainfo in the header.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.alignlength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.alignment-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.auxdata-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.auxdata","text":"auxdata(record::Record)::BAM.AuxData\n\nGet the auxiliary data of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.cigar","page":"API Reference","title":"XAM.BAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar_rle.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.cigar_rle","page":"API Reference","title":"XAM.BAM.cigar_rle","text":"cigar_rle(record::Record, checkCG::Bool = true)::Tuple{Vector{BioAlignments.Operation},Vector{Int}}\n\nGet a run-length encoded tuple (ops, lens) of the CIGAR string in record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.flag-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.ismapped-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.isnextmapped-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.ispositivestrand-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.ispositivestrand","text":"ispositivestrand(record::Record)::Bool\n\nTest if record is aligned to the positive strand.\n\nThis is equivalent to flag(record) & 0x10 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.isprimary-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.mappingquality-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.n_cigar_op","page":"API Reference","title":"XAM.BAM.n_cigar_op","text":"n_cigar_op(record::Record, checkCG::Bool = true)\n\nReturn the number of operations in the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the number of operations in the true cigar string, because this is probably what you want, the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to get the number of operations in the cigar field of the BAM record, then set checkCG to false.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.nextposition-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the 1-based leftmost mapping position of the next/mate read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.nextrefid-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextrefid","text":"nextrefid(record::Record)::Int\n\nGet the next/mate reference sequence ID of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.nextrefname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.position-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.quality-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.refid-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.refid","text":"refid(record::Record)::Int\n\nGet the reference sequence ID of record.\n\nThe ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.\n\nSee also: BAM.rname\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.reflen-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.reflen","text":"reflen(record::Record)::Int\n\nGet the length of the reference sequence this record applies to.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.refname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\nSee also: BAM.refid\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.rightposition-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.seqlength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.sequence-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.templength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.tempname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"method"},{"location":"hts-files/#SAM-and-BAM-1","page":"SAM and BAM","title":"SAM and BAM","text":"","category":"section"},{"location":"hts-files/#Introduction-1","page":"SAM and BAM","title":"Introduction","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"High-throughput sequencing (HTS) technologies generate a large amount of data in the form of a large number of nucleotide sequencing reads. One of the most common tasks in bioinformatics is to align these reads against known reference genomes, chromosomes, or contigs. BioAlignments provides several data formats commonly used for this kind of task.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments offers high-performance tools for SAM and BAM file formats, which are the most popular file formats.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"If you have questions about the SAM and BAM formats or any of the terminology used when discussing these formats, see the published [specification][samtools-spec], which is maintained by the [samtools group][samtools].","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A very very simple SAM file looks like the following:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"@HD VN:1.6 SO:coordinate\n@SQ SN:ref LN:45\nr001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *\nr002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *\nr003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;\nr004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *\nr003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;\nr001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Where the first two lines are part of the \"header\", and the following lines are \"records\". Each record describes how a read aligns to some reference sequence. Sometimes one record describes one read, but there are other cases like chimeric reads and split alignments, where multiple records apply to one read. In the example above, r003 is a chimeric read, and r004 is a split alignment, and r001 are mate pair reads. Again, we refer you to the official [specification][samtools-spec] for more details.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A BAM file stores this same information but in a binary and compressible format that does not make for pretty printing here!","category":"page"},{"location":"hts-files/#Reading-SAM-and-BAM-files-1","page":"SAM and BAM","title":"Reading SAM and BAM files","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A typical script iterating over all records in a file looks like below:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"using BioAlignments\n\n# Open a BAM file.\nreader = open(BAM.Reader, \"data.bam\")\n\n# Iterate over BAM records.\nfor record in reader\n # `record` is a BAM.Record object.\n if BAM.ismapped(record)\n # Print the mapped position.\n println(BAM.refname(record), ':', BAM.position(record))\n end\nend\n\n# Close the BAM file.\nclose(reader)","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"The size of a BAM file is often extremely large. The iterator interface demonstrated above allocates an object for each record and that may be a bottleneck of reading data from a BAM file. In-place reading reuses a pre-allocated object for every record and less memory allocation happens in reading:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"reader = open(BAM.Reader, \"data.bam\")\nrecord = BAM.Record()\nwhile !eof(reader)\n read!(reader, record)\n # do something\nend","category":"page"},{"location":"hts-files/#SAM-and-BAM-Headers-1","page":"SAM and BAM","title":"SAM and BAM Headers","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Both SAM.Reader and BAM.Reader implement the header function, which returns a SAM.Header object. To extract certain information out of the headers, you can use the find method on the header to extract information according to SAM/BAM tag. Again we refer you to the [specification][samtools-spec] for full details of all the different tags that can occur in headers, and what they mean.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Below is an example of extracting all the info about the reference sequences from the BAM header. In SAM/BAM, any description of a reference sequence is stored in the header, under a tag denoted SQ (think reference SeQuence!).","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> reader = open(SAM.Reader, \"data.sam\");\n\njulia> find(header(reader), \"SQ\")\n7-element Array{Bio.Align.SAM.MetaInfo,1}:\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr1 LN=30427671\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr2 LN=19698289\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr3 LN=23459830\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr4 LN=18585056\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr5 LN=26975502\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=chloroplast LN=154478\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=mitochondria LN=366924\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"In the above we can see there were 7 sequences in the reference: 5 chromosomes, one chloroplast sequence, and one mitochondrial sequence.","category":"page"},{"location":"hts-files/#SAM-and-BAM-Records-1","page":"SAM and BAM","title":"SAM and BAM Records","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the following accessors for SAM.Record types.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"XAM.SAM.flag\nXAM.SAM.ismapped\nXAM.SAM.isprimary\nXAM.SAM.refname\nXAM.SAM.position\nXAM.SAM.rightposition\nXAM.SAM.isnextmapped\nXAM.SAM.nextrefname\nXAM.SAM.nextposition\nXAM.SAM.mappingquality\nXAM.SAM.cigar\nXAM.SAM.alignment\nXAM.SAM.alignlength\nXAM.SAM.tempname\nXAM.SAM.templength\nXAM.SAM.sequence\nXAM.SAM.seqlength\nXAM.SAM.quality\nXAM.SAM.auxdata","category":"page"},{"location":"hts-files/#XAM.SAM.flag","page":"SAM and BAM","title":"XAM.SAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.ismapped","page":"SAM and BAM","title":"XAM.SAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.isprimary","page":"SAM and BAM","title":"XAM.SAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.refname","page":"SAM and BAM","title":"XAM.SAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.position","page":"SAM and BAM","title":"XAM.SAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.rightposition","page":"SAM and BAM","title":"XAM.SAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.isnextmapped","page":"SAM and BAM","title":"XAM.SAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.nextrefname","page":"SAM and BAM","title":"XAM.SAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.nextposition","page":"SAM and BAM","title":"XAM.SAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the position of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.mappingquality","page":"SAM and BAM","title":"XAM.SAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.cigar","page":"SAM and BAM","title":"XAM.SAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.alignment","page":"SAM and BAM","title":"XAM.SAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.alignlength","page":"SAM and BAM","title":"XAM.SAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.tempname","page":"SAM and BAM","title":"XAM.SAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.templength","page":"SAM and BAM","title":"XAM.SAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.sequence","page":"SAM and BAM","title":"XAM.SAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\nsequence(::Type{String}, record::Record)::String\n\nGet the segment sequence of record as String.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.seqlength","page":"SAM and BAM","title":"XAM.SAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.quality","page":"SAM and BAM","title":"XAM.SAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the Phred-scaled base quality of record.\n\n\n\n\n\nquality(::Type{String}, record::Record)::String\n\nGet the ASCII-encoded base quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.auxdata","page":"SAM and BAM","title":"XAM.SAM.auxdata","text":"auxdata(record::Record)::Dict{String,Any}\n\nGet the auxiliary data (optional fields) of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the following accessors for BAM.Record types.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"XAM.BAM.flag\nXAM.BAM.ismapped\nXAM.BAM.isprimary\nXAM.BAM.refid\nXAM.BAM.refname\nXAM.BAM.reflen\nXAM.BAM.position\nXAM.BAM.rightposition\nXAM.BAM.isnextmapped\nXAM.BAM.nextrefid\nXAM.BAM.nextrefname\nXAM.BAM.nextposition\nXAM.BAM.mappingquality\nXAM.BAM.cigar\nXAM.BAM.alignment\nXAM.BAM.alignlength\nXAM.BAM.tempname\nXAM.BAM.templength\nXAM.BAM.sequence\nXAM.BAM.seqlength\nXAM.BAM.quality\nXAM.BAM.auxdata","category":"page"},{"location":"hts-files/#XAM.BAM.flag","page":"SAM and BAM","title":"XAM.BAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.ismapped","page":"SAM and BAM","title":"XAM.BAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.isprimary","page":"SAM and BAM","title":"XAM.BAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.refid","page":"SAM and BAM","title":"XAM.BAM.refid","text":"refid(record::Record)::Int\n\nGet the reference sequence ID of record.\n\nThe ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.\n\nSee also: BAM.rname\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.refname","page":"SAM and BAM","title":"XAM.BAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\nSee also: BAM.refid\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.reflen","page":"SAM and BAM","title":"XAM.BAM.reflen","text":"reflen(record::Record)::Int\n\nGet the length of the reference sequence this record applies to.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.position","page":"SAM and BAM","title":"XAM.BAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.rightposition","page":"SAM and BAM","title":"XAM.BAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.isnextmapped","page":"SAM and BAM","title":"XAM.BAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextrefid","page":"SAM and BAM","title":"XAM.BAM.nextrefid","text":"nextrefid(record::Record)::Int\n\nGet the next/mate reference sequence ID of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextrefname","page":"SAM and BAM","title":"XAM.BAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextposition","page":"SAM and BAM","title":"XAM.BAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the 1-based leftmost mapping position of the next/mate read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.mappingquality","page":"SAM and BAM","title":"XAM.BAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.cigar","page":"SAM and BAM","title":"XAM.BAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar_rle.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.alignment","page":"SAM and BAM","title":"XAM.BAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.alignlength","page":"SAM and BAM","title":"XAM.BAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.tempname","page":"SAM and BAM","title":"XAM.BAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.templength","page":"SAM and BAM","title":"XAM.BAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.sequence","page":"SAM and BAM","title":"XAM.BAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.seqlength","page":"SAM and BAM","title":"XAM.BAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.quality","page":"SAM and BAM","title":"XAM.BAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the base quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.auxdata","page":"SAM and BAM","title":"XAM.BAM.auxdata","text":"auxdata(record::Record)::BAM.AuxData\n\nGet the auxiliary data of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#Accessing-auxiliary-data-1","page":"SAM and BAM","title":"Accessing auxiliary data","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"SAM and BAM records support the storing of optional data fields associated with tags.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Tagged auxiliary data follows a format of TAG:TYPE:VALUE. TAG is a two-letter string, and each tag can only appear once per record. TYPE is a single case-sensetive letter which defined the format of VALUE.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Type Description\n'A' Printable character\n'i' Signed integer\n'f' Single-precision floating number\n'Z' Printable string, including space\n'H' Byte array in Hex format\n'B' Integer of numeric array","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"For more information about these tags and their types we refer you to the [SAM/BAM specification][samtools-spec] and the additional [optional fields specification][samtags] document.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"There are some tags that are reserved, predefined standard tags, for specific uses.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"To access optional fields stored in tags, you use getindex indexing syntax on the record object. Note that accessing optional tag fields will result in type instability in Julia. This is because the type of the optional data is not known until run-time, as the tag is being read. This can have a significant impact on performance. To limit this, if the user knows the type of a value in advance, specifying it as a type annotation will alleviate the problem:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Below is an example of looping over records in a bam file and using indexing syntax to get the data stored in the \"NM\" tag. Note the UInt8 type assertion to alleviate type instability.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"for record in open(BAM.Reader, \"data.bam\")\n nm = record[\"NM\"]::UInt8\n # do something\nend","category":"page"},{"location":"hts-files/#Getting-records-in-a-range-1","page":"SAM and BAM","title":"Getting records in a range","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the BAI index to fetch records in a specific range from a BAM file. from a BAM file. [Samtools][samtools] provides index subcommand to create an index file (.bai) from a sorted BAM file.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"$ samtools index -b SRR1238088.sort.bam\n$ ls SRR1238088.sort.bam*\nSRR1238088.sort.bam SRR1238088.sort.bam.bai","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"eachoverlap(reader, chrom, range) returns an iterator of BAM records overlapping the query interval:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"reader = open(BAM.Reader, \"SRR1238088.sort.bam\", index=\"SRR1238088.sort.bam.bai\")\nfor record in eachoverlap(reader, \"Chr2\", 10000:11000)\n # `record` is a BAM.Record object\n # ...\nend\nclose(reader)","category":"page"},{"location":"hts-files/#Getting-records-overlapping-genomic-features-1","page":"SAM and BAM","title":"Getting records overlapping genomic features","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"eachoverlap also accepts the Interval type defined in [GenomicFeatures.jl][genomicfeatures].","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"This allows you to do things like first read in the genomic features from a GFF3 file, and then for each feature, iterate over all the BAM records that overlap with that feature.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"# Load GFF3 module.\nusing GenomicFeatures\nusing BioAlignments\n\n# Load genomic features from a GFF3 file.\nfeatures = open(collect, GFF3.Reader, \"TAIR10_GFF3_genes.gff\")\n\n# Keep mRNA features.\nfilter!(x -> GFF3.featuretype(x) == \"mRNA\", features)\n\n# Open a BAM file and iterate over records overlapping mRNA transcripts.\nreader = open(BAM.Reader, \"SRR1238088.sort.bam\", index = \"SRR1238088.sort.bam.bai\")\nfor feature in features\n for record in eachoverlap(reader, feature)\n # `record` overlaps `feature`.\n # ...\n end\nend\nclose(reader)","category":"page"},{"location":"hts-files/#Writing-files-1","page":"SAM and BAM","title":"Writing files","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"In order to write a BAM or SAM file, you must first create a SAM.Header.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A SAM.Header is constructed from a vector of SAM.MetaInfo objects.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"For example, to create the following simple header:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"@HD VN:1.6 SO:coordinate\n@SQ SN:ref LN:45","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> a = SAM.MetaInfo(\"HD\", [\"VN\" => 1.6, \"SO\" => \"coordinate\"])\nSAM.MetaInfo:\n tag: HD\n value: VN=1.6 SO=coordinate\n\njulia> b = SAM.MetaInfo(\"SQ\", [\"SN\" => \"ref\", \"LN\" => 45])\nSAM.MetaInfo:\n tag: SQ\n value: SN=ref LN=45\n\njulia> h = SAM.Header([a, b])\nSAM.Header(SAM.MetaInfo[SAM.MetaInfo:\n tag: HD\n value: VN=1.6 SO=coordinate, SAM.MetaInfo:\n tag: SQ\n value: SN=ref LN=45])\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Then to create the writer for a SAM file, construct a SAM.Writer using the header and an IO type:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> samw = SAM.Writer(open(\"my-data.sam\", \"w\"), h)\nSAM.Writer(IOStream())\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"To make a BAM Writer is slightly different, as you need to use a specific stream type from the [BGZFStreams][bgzfstreams] package:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> using BGZFStreams\n\njulia> bamw = BAM.Writer(BGZFStream(open(\"my-data.bam\", \"w\"), \"w\"))\nBAM.Writer(BGZFStreams.BGZFStream{IOStream}())\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Once you have a BAM or SAM writer, you can use the write method to write BAM.Records or SAM.Records to file:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> write(bamw, rec) # Here rec is a `BAM.Record`\n330780","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"[samtools]: https://samtools.github.io/ [samtools-spec]: https://samtools.github.io/hts-specs/SAMv1.pdf [samtags]: https://samtools.github.io/hts-specs/SAMtags.pdf [bgzfstreams]: https://github.com/BioJulia/BGZFStreams.jl [genomicfeatures]: https://github.com/BioJulia/GenomicFeatures.jl","category":"page"}] +[{"location":"#XAM.jl-1","page":"Home","title":"XAM.jl","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"(Image: Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.) (Image: Latest Release) (Image: MIT license) (Image: Join the chat at https://gitter.im/BioJulia/XAM.jl)","category":"page"},{"location":"#Description-1","page":"Home","title":"Description","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"XAM provides I/O and utilities for manipulating SAM and BAM formatted alignment map files.","category":"page"},{"location":"#Installation-1","page":"Home","title":"Installation","text":"","category":"section"},{"location":"#","page":"Home","title":"Home","text":"XAM is made available to install through BioJulia's package registry. Julia's package manager only uses the \"General\" package registry by default. Your Julia configuration needs to include the BioJulia registry to be able to install the latest version of XAM.","category":"page"},{"location":"#","page":"Home","title":"Home","text":"To add the BioJulia registry from the Julia REPL, press ] to enter pkg mode, then enter the following command:","category":"page"},{"location":"#","page":"Home","title":"Home","text":"registry add https://github.com/BioJulia/BioJuliaRegistry.git","category":"page"},{"location":"#","page":"Home","title":"Home","text":"Once the registry is added, you can install XAM while in pkg mode with the following command:","category":"page"},{"location":"#","page":"Home","title":"Home","text":"add XAM","category":"page"},{"location":"#","page":"Home","title":"Home","text":"If you are interested in the cutting edge of the development, please check out the develop branch to try new features before release.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"CurrentModule = XAM\nDocTestSetup = quote\n using XAM\nend","category":"page"},{"location":"api/api/#Public-API-Reference-1","page":"API Reference","title":"Public API Reference","text":"","category":"section"},{"location":"api/api/#Contents-1","page":"API Reference","title":"Contents","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Pages = [\"api.md\"]","category":"page"},{"location":"api/api/#Index-1","page":"API Reference","title":"Index","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Pages = [\"api.md\"]","category":"page"},{"location":"api/api/#SAM-API-1","page":"API Reference","title":"SAM API","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"The following methods and types are provided by the SAM submodule for public use.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Modules = [XAM.SAM]\nprivate = false","category":"page"},{"location":"api/api/#XAM.SAM.FLAG_DUP","page":"API Reference","title":"XAM.SAM.FLAG_DUP","text":"0x0400: optical or PCR duplicate\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_MREVERSE","page":"API Reference","title":"XAM.SAM.FLAG_MREVERSE","text":"0x0020: the mate is mapped to the reverse strand\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_MUNMAP","page":"API Reference","title":"XAM.SAM.FLAG_MUNMAP","text":"0x0008: the mate is unmapped\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_PAIRED","page":"API Reference","title":"XAM.SAM.FLAG_PAIRED","text":"0x0001: the read is paired in sequencing, no matter whether it is mapped in a pair\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_PROPER_PAIR","page":"API Reference","title":"XAM.SAM.FLAG_PROPER_PAIR","text":"0x0002: the read is mapped in a proper pair\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_QCFAIL","page":"API Reference","title":"XAM.SAM.FLAG_QCFAIL","text":"0x0200: QC failure\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_READ1","page":"API Reference","title":"XAM.SAM.FLAG_READ1","text":"0x0040: this is read1\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_READ2","page":"API Reference","title":"XAM.SAM.FLAG_READ2","text":"0x0080: this is read2\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_REVERSE","page":"API Reference","title":"XAM.SAM.FLAG_REVERSE","text":"0x0010: the read is mapped to the reverse strand\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_SECONDARY","page":"API Reference","title":"XAM.SAM.FLAG_SECONDARY","text":"0x0100: not primary alignment\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_SUPPLEMENTARY","page":"API Reference","title":"XAM.SAM.FLAG_SUPPLEMENTARY","text":"0x0800: supplementary alignment\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.FLAG_UNMAP","page":"API Reference","title":"XAM.SAM.FLAG_UNMAP","text":"0x0004: the read itself is unmapped; conflictive with SAM.FLAGPROPERPAIR\n\n\n\n\n\n","category":"constant"},{"location":"api/api/#XAM.SAM.Header-Tuple{}","page":"API Reference","title":"XAM.SAM.Header","text":"SAM.Header()\n\nCreate an empty header.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.MetaInfo-Tuple{AbstractString,Any}","page":"API Reference","title":"XAM.SAM.MetaInfo","text":"MetaInfo(tag::AbstractString, value)\n\nCreate a SAM metainfo with tag and value.\n\ntag is a two-byte ASCII string. If tag is \"CO\", value must be a string; otherwise, value is an iterable object with key and value pairs.\n\nExamples\n\njulia> SAM.MetaInfo(\"CO\", \"some comment\")\nBioAlignments.SAM.MetaInfo:\n tag: CO\n value: some comment\n\njulia> string(ans)\n\"@CO\tsome comment\"\n\njulia> SAM.MetaInfo(\"SQ\", [\"SN\" => \"chr1\", \"LN\" => 12345])\nBioAlignments.SAM.MetaInfo:\n tag: SQ\n value: SN=chr1 LN=12345\n\njulia> string(ans)\n\"@SQ\tSN:chr1\tLN:12345\"\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.MetaInfo-Tuple{AbstractString}","page":"API Reference","title":"XAM.SAM.MetaInfo","text":"MetaInfo(str::AbstractString)\n\nCreate a SAM metainfo from str.\n\nExamples\n\njulia> SAM.MetaInfo(\"@CO\tsome comment\")\nBioAlignments.SAM.MetaInfo:\n tag: CO\n value: some comment\n\njulia> SAM.MetaInfo(\"@SQ\tSN:chr1\tLN:12345\")\nBioAlignments.SAM.MetaInfo:\n tag: SQ\n value: SN=chr1 LN=12345\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Reader-Tuple{IO}","page":"API Reference","title":"XAM.SAM.Reader","text":"SAM.Reader(input::IO)\n\nCreate a data reader of the SAM file format.\n\nArguments\n\ninput: data source\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{AbstractString}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record(str::AbstractString)\n\nCreate a SAM record from str. This function verifies the format and indexes fields for accessors.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{Array{UInt8,1}}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record(data::Vector{UInt8})\n\nCreate a SAM record from data. This function verifies the format and indexes fields for accessors. Note that the ownership of data is transferred to a new record object.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Record-Tuple{}","page":"API Reference","title":"XAM.SAM.Record","text":"SAM.Record()\n\nCreate an unfilled SAM record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.Writer","page":"API Reference","title":"XAM.SAM.Writer","text":"Writer(output::IO, header::Header=Header())\n\nCreate a data writer of the SAM file format.\n\nArguments\n\noutput: data sink\nheader=Header(): SAM header object\n\n\n\n\n\n","category":"type"},{"location":"api/api/#Base.findall-Tuple{XAM.SAM.Header,AbstractString}","page":"API Reference","title":"Base.findall","text":"find(header::Header, key::AbstractString)::Vector{MetaInfo}\n\nFind metainfo objects satisfying SAM.tag(metainfo) == key.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#BioGenerics.header-Tuple{XAM.SAM.Reader}","page":"API Reference","title":"BioGenerics.header","text":"header(reader::Reader)::Header\n\nGet the header of reader.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.alignlength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.alignment-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.auxdata-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.auxdata","text":"auxdata(record::Record)::Dict{String,Any}\n\nGet the auxiliary data (optional fields) of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.cigar-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.flag-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.iscomment-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.iscomment","text":"iscomment(metainfo::MetaInfo)::Bool\n\nTest if metainfo is a comment (i.e. its tag is \"CO\").\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.ismapped-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.isnextmapped-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.isprimary-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.keyvalues-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.keyvalues","text":"keyvalues(metainfo::MetaInfo)::Vector{Pair{String,String}}\n\nGet the values of metainfo as string pairs.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.mappingquality-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.nextposition-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the position of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.nextrefname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.position-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.quality-Tuple{Type{String},XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.quality","text":"quality(::Type{String}, record::Record)::String\n\nGet the ASCII-encoded base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.quality-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the Phred-scaled base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.refname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.rightposition-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.seqlength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.sequence-Tuple{Type{String},XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.sequence","text":"sequence(::Type{String}, record::Record)::String\n\nGet the segment sequence of record as String.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.sequence-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.tag-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.tag","text":"tag(metainfo::MetaInfo)::String\n\nGet the tag of metainfo.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.templength-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.tempname-Tuple{XAM.SAM.Record}","page":"API Reference","title":"XAM.SAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.SAM.value-Tuple{XAM.SAM.MetaInfo}","page":"API Reference","title":"XAM.SAM.value","text":"value(metainfo::MetaInfo)::String\n\nGet the value of metainfo as a string.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#BAM-API-1","page":"API Reference","title":"BAM API","text":"","category":"section"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"The following methods and types are provided by the BAM submodule for public use.","category":"page"},{"location":"api/api/#","page":"API Reference","title":"API Reference","text":"Modules = [XAM.BAM]\nprivate = false","category":"page"},{"location":"api/api/#XAM.BAM.BAI-Tuple{AbstractString}","page":"API Reference","title":"XAM.BAM.BAI","text":"BAI(filename::AbstractString)\n\nLoad a BAI index from filename.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.BAI-Tuple{IO}","page":"API Reference","title":"XAM.BAM.BAI","text":"BAI(input::IO)\n\nLoad a BAI index from input.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.Reader","page":"API Reference","title":"XAM.BAM.Reader","text":"BAM.Reader(input::IO; index=nothing)\n\nCreate a data reader of the BAM file format.\n\nArguments\n\ninput: data source\nindex=nothing: filepath to a random access index (currently bai is supported)\n\n\n\n\n\n","category":"type"},{"location":"api/api/#XAM.BAM.Record","page":"API Reference","title":"XAM.BAM.Record","text":"BAM.Record()\n\nCreate an unfilled BAM record.\n\n\n\n\n\n","category":"type"},{"location":"api/api/#XAM.BAM.Writer","page":"API Reference","title":"XAM.BAM.Writer","text":"BAM.Writer(output::BGZFStream, header::SAM.Header)\n\nCreate a data writer of the BAM file format.\n\nArguments\n\noutput: data sink\nheader: SAM header object\n\n\n\n\n\n","category":"type"},{"location":"api/api/#BioGenerics.header-Tuple{XAM.BAM.Reader}","page":"API Reference","title":"BioGenerics.header","text":"header(reader::Reader; fillSQ::Bool=false)::SAM.Header\n\nGet the header of reader.\n\nIf fillSQ is true, this function fills missing \"SQ\" metainfo in the header.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.alignlength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.alignment-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.auxdata-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.auxdata","text":"auxdata(record::Record)::BAM.AuxData\n\nGet the auxiliary data of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.cigar","page":"API Reference","title":"XAM.BAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar_rle.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.cigar_rle","page":"API Reference","title":"XAM.BAM.cigar_rle","text":"cigar_rle(record::Record, checkCG::Bool = true)::Tuple{Vector{BioAlignments.Operation},Vector{Int}}\n\nGet a run-length encoded tuple (ops, lens) of the CIGAR string in record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.flag-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.ismapped-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.isnextmapped-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.ispositivestrand-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.ispositivestrand","text":"ispositivestrand(record::Record)::Bool\n\nTest if record is aligned to the positive strand.\n\nThis is equivalent to flag(record) & 0x10 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.isprimary-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.mappingquality-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.n_cigar_op","page":"API Reference","title":"XAM.BAM.n_cigar_op","text":"n_cigar_op(record::Record, checkCG::Bool = true)\n\nReturn the number of operations in the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the number of operations in the true cigar string, because this is probably what you want, the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to get the number of operations in the cigar field of the BAM record, then set checkCG to false.\n\n\n\n\n\n","category":"function"},{"location":"api/api/#XAM.BAM.nextposition-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the 1-based leftmost mapping position of the next/mate read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.nextrefid-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextrefid","text":"nextrefid(record::Record)::Int\n\nGet the next/mate reference sequence ID of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.nextrefname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.position-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.quality-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the base quality of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.refid-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.refid","text":"refid(record::Record)::Int\n\nGet the reference sequence ID of record.\n\nThe ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.\n\nSee also: BAM.rname\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.reflen-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.reflen","text":"reflen(record::Record)::Int\n\nGet the length of the reference sequence this record applies to.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.refname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\nSee also: BAM.refid\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.rightposition-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.seqlength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.sequence-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.templength-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"method"},{"location":"api/api/#XAM.BAM.tempname-Tuple{XAM.BAM.Record}","page":"API Reference","title":"XAM.BAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"method"},{"location":"hts-files/#SAM-and-BAM-1","page":"SAM and BAM","title":"SAM and BAM","text":"","category":"section"},{"location":"hts-files/#Introduction-1","page":"SAM and BAM","title":"Introduction","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"High-throughput sequencing (HTS) technologies generate a large amount of data in the form of a large number of nucleotide sequencing reads. One of the most common tasks in bioinformatics is to align these reads against known reference genomes, chromosomes, or contigs. BioAlignments provides several data formats commonly used for this kind of task.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments offers high-performance tools for SAM and BAM file formats, which are the most popular file formats.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"If you have questions about the SAM and BAM formats or any of the terminology used when discussing these formats, see the published specification, which is maintained by the samtools group.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A very very simple SAM file looks like the following:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"@HD VN:1.6 SO:coordinate\n@SQ SN:ref LN:45\nr001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *\nr002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *\nr003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;\nr004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *\nr003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;\nr001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Where the first two lines are part of the \"header\", and the following lines are \"records\". Each record describes how a read aligns to some reference sequence. Sometimes one record describes one read, but there are other cases like chimeric reads and split alignments, where multiple records apply to one read. In the example above, r003 is a chimeric read, and r004 is a split alignment, and r001 are mate pair reads. Again, we refer you to the official specification for more details.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A BAM file stores this same information but in a binary and compressible format that does not make for pretty printing here!","category":"page"},{"location":"hts-files/#Reading-SAM-and-BAM-files-1","page":"SAM and BAM","title":"Reading SAM and BAM files","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A typical script iterating over all records in a file looks like below:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"using BioAlignments\n\n# Open a BAM file.\nreader = open(BAM.Reader, \"data.bam\")\n\n# Iterate over BAM records.\nfor record in reader\n # `record` is a BAM.Record object.\n if BAM.ismapped(record)\n # Print the mapped position.\n println(BAM.refname(record), ':', BAM.position(record))\n end\nend\n\n# Close the BAM file.\nclose(reader)","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"The size of a BAM file is often extremely large. The iterator interface demonstrated above allocates an object for each record and that may be a bottleneck of reading data from a BAM file. In-place reading reuses a pre-allocated object for every record and less memory allocation happens in reading:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"reader = open(BAM.Reader, \"data.bam\")\nrecord = BAM.Record()\nwhile !eof(reader)\n read!(reader, record)\n # do something\nend","category":"page"},{"location":"hts-files/#SAM-and-BAM-Headers-1","page":"SAM and BAM","title":"SAM and BAM Headers","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Both SAM.Reader and BAM.Reader implement the header function, which returns a SAM.Header object. To extract certain information out of the headers, you can use the find method on the header to extract information according to SAM/BAM tag. Again we refer you to the specification for full details of all the different tags that can occur in headers, and what they mean.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Below is an example of extracting all the info about the reference sequences from the BAM header. In SAM/BAM, any description of a reference sequence is stored in the header, under a tag denoted SQ (think reference SeQuence!).","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> reader = open(SAM.Reader, \"data.sam\");\n\njulia> find(header(reader), \"SQ\")\n7-element Array{Bio.Align.SAM.MetaInfo,1}:\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr1 LN=30427671\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr2 LN=19698289\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr3 LN=23459830\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr4 LN=18585056\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=Chr5 LN=26975502\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=chloroplast LN=154478\n Bio.Align.SAM.MetaInfo:\n tag: SQ\n value: SN=mitochondria LN=366924\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"In the above we can see there were 7 sequences in the reference: 5 chromosomes, one chloroplast sequence, and one mitochondrial sequence.","category":"page"},{"location":"hts-files/#SAM-and-BAM-Records-1","page":"SAM and BAM","title":"SAM and BAM Records","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the following accessors for SAM.Record types.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"XAM.SAM.flag\nXAM.SAM.ismapped\nXAM.SAM.isprimary\nXAM.SAM.refname\nXAM.SAM.position\nXAM.SAM.rightposition\nXAM.SAM.isnextmapped\nXAM.SAM.nextrefname\nXAM.SAM.nextposition\nXAM.SAM.mappingquality\nXAM.SAM.cigar\nXAM.SAM.alignment\nXAM.SAM.alignlength\nXAM.SAM.tempname\nXAM.SAM.templength\nXAM.SAM.sequence\nXAM.SAM.seqlength\nXAM.SAM.quality\nXAM.SAM.auxdata","category":"page"},{"location":"hts-files/#XAM.SAM.flag","page":"SAM and BAM","title":"XAM.SAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.ismapped","page":"SAM and BAM","title":"XAM.SAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.isprimary","page":"SAM and BAM","title":"XAM.SAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.refname","page":"SAM and BAM","title":"XAM.SAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.position","page":"SAM and BAM","title":"XAM.SAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.rightposition","page":"SAM and BAM","title":"XAM.SAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.isnextmapped","page":"SAM and BAM","title":"XAM.SAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.nextrefname","page":"SAM and BAM","title":"XAM.SAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.nextposition","page":"SAM and BAM","title":"XAM.SAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the position of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.mappingquality","page":"SAM and BAM","title":"XAM.SAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.cigar","page":"SAM and BAM","title":"XAM.SAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.alignment","page":"SAM and BAM","title":"XAM.SAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.alignlength","page":"SAM and BAM","title":"XAM.SAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.tempname","page":"SAM and BAM","title":"XAM.SAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.templength","page":"SAM and BAM","title":"XAM.SAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.sequence","page":"SAM and BAM","title":"XAM.SAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\nsequence(::Type{String}, record::Record)::String\n\nGet the segment sequence of record as String.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.seqlength","page":"SAM and BAM","title":"XAM.SAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.quality","page":"SAM and BAM","title":"XAM.SAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the Phred-scaled base quality of record.\n\n\n\n\n\nquality(::Type{String}, record::Record)::String\n\nGet the ASCII-encoded base quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.SAM.auxdata","page":"SAM and BAM","title":"XAM.SAM.auxdata","text":"auxdata(record::Record)::Dict{String,Any}\n\nGet the auxiliary data (optional fields) of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the following accessors for BAM.Record types.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"XAM.BAM.flag\nXAM.BAM.ismapped\nXAM.BAM.isprimary\nXAM.BAM.refid\nXAM.BAM.refname\nXAM.BAM.reflen\nXAM.BAM.position\nXAM.BAM.rightposition\nXAM.BAM.isnextmapped\nXAM.BAM.nextrefid\nXAM.BAM.nextrefname\nXAM.BAM.nextposition\nXAM.BAM.mappingquality\nXAM.BAM.cigar\nXAM.BAM.alignment\nXAM.BAM.alignlength\nXAM.BAM.tempname\nXAM.BAM.templength\nXAM.BAM.sequence\nXAM.BAM.seqlength\nXAM.BAM.quality\nXAM.BAM.auxdata","category":"page"},{"location":"hts-files/#XAM.BAM.flag","page":"SAM and BAM","title":"XAM.BAM.flag","text":"flag(record::Record)::UInt16\n\nGet the bitwise flag of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.ismapped","page":"SAM and BAM","title":"XAM.BAM.ismapped","text":"ismapped(record::Record)::Bool\n\nTest if record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.isprimary","page":"SAM and BAM","title":"XAM.BAM.isprimary","text":"isprimary(record::Record)::Bool\n\nTest if record is a primary line of the read.\n\nThis is equivalent to flag(record) & 0x900 == 0.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.refid","page":"SAM and BAM","title":"XAM.BAM.refid","text":"refid(record::Record)::Int\n\nGet the reference sequence ID of record.\n\nThe ID is 1-based (i.e. the first sequence is 1) and is 0 for a record without a mapping position.\n\nSee also: BAM.rname\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.refname","page":"SAM and BAM","title":"XAM.BAM.refname","text":"refname(record::Record)::String\n\nGet the reference sequence name of record.\n\nSee also: BAM.refid\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.reflen","page":"SAM and BAM","title":"XAM.BAM.reflen","text":"reflen(record::Record)::Int\n\nGet the length of the reference sequence this record applies to.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.position","page":"SAM and BAM","title":"XAM.BAM.position","text":"position(record::Record)::Int\n\nGet the 1-based leftmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.rightposition","page":"SAM and BAM","title":"XAM.BAM.rightposition","text":"rightposition(record::Record)::Int\n\nGet the 1-based rightmost mapping position of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.isnextmapped","page":"SAM and BAM","title":"XAM.BAM.isnextmapped","text":"isnextmapped(record::Record)::Bool\n\nTest if the mate/next read of record is mapped.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextrefid","page":"SAM and BAM","title":"XAM.BAM.nextrefid","text":"nextrefid(record::Record)::Int\n\nGet the next/mate reference sequence ID of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextrefname","page":"SAM and BAM","title":"XAM.BAM.nextrefname","text":"nextrefname(record::Record)::String\n\nGet the reference name of the mate/next read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.nextposition","page":"SAM and BAM","title":"XAM.BAM.nextposition","text":"nextposition(record::Record)::Int\n\nGet the 1-based leftmost mapping position of the next/mate read of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.mappingquality","page":"SAM and BAM","title":"XAM.BAM.mappingquality","text":"mappingquality(record::Record)::UInt8\n\nGet the mapping quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.cigar","page":"SAM and BAM","title":"XAM.BAM.cigar","text":"cigar(record::Record)::String\n\nGet the CIGAR string of record.\n\nNote that in the BAM specification, the field called cigar typically stores the cigar string of the record. However, this is not always true, sometimes the true cigar is very long, and due to some constraints of the BAM format, the actual cigar string is stored in an extra tag: CG:B,I, and the cigar field stores a pseudo-cigar string.\n\nCalling this method with checkCG set to true (default) this method will always yield the true cigar string, because this is probably what you want the vast majority of the time.\n\nIf you have a record that stores the true cigar in a CG:B,I tag, but you still want to access the pseudo-cigar that is stored in the cigar field of the BAM record, then you can set checkCG to false.\n\nSee also BAM.cigar_rle.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.alignment","page":"SAM and BAM","title":"XAM.BAM.alignment","text":"alignment(record::Record)::BioAlignments.Alignment\n\nGet the alignment of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.alignlength","page":"SAM and BAM","title":"XAM.BAM.alignlength","text":"alignlength(record::Record)::Int\n\nGet the alignment length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.tempname","page":"SAM and BAM","title":"XAM.BAM.tempname","text":"tempname(record::Record)::String\n\nGet the query template name of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.templength","page":"SAM and BAM","title":"XAM.BAM.templength","text":"templength(record::Record)::Int\n\nGet the template length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.sequence","page":"SAM and BAM","title":"XAM.BAM.sequence","text":"sequence(record::Record)::BioSequences.DNASequence\n\nGet the segment sequence of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.seqlength","page":"SAM and BAM","title":"XAM.BAM.seqlength","text":"seqlength(record::Record)::Int\n\nGet the sequence length of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.quality","page":"SAM and BAM","title":"XAM.BAM.quality","text":"quality(record::Record)::Vector{UInt8}\n\nGet the base quality of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#XAM.BAM.auxdata","page":"SAM and BAM","title":"XAM.BAM.auxdata","text":"auxdata(record::Record)::BAM.AuxData\n\nGet the auxiliary data of record.\n\n\n\n\n\n","category":"function"},{"location":"hts-files/#Accessing-auxiliary-data-1","page":"SAM and BAM","title":"Accessing auxiliary data","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"SAM and BAM records support the storing of optional data fields associated with tags.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Tagged auxiliary data follows a format of TAG:TYPE:VALUE. TAG is a two-letter string, and each tag can only appear once per record. TYPE is a single case-sensetive letter which defined the format of VALUE.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Type Description\n'A' Printable character\n'i' Signed integer\n'f' Single-precision floating number\n'Z' Printable string, including space\n'H' Byte array in Hex format\n'B' Integer of numeric array","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"For more information about these tags and their types we refer you to the SAM/BAM specification and the additional optional fields specification document.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"There are some tags that are reserved, predefined standard tags, for specific uses.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"To access optional fields stored in tags, you use getindex indexing syntax on the record object. Note that accessing optional tag fields will result in type instability in Julia. This is because the type of the optional data is not known until run-time, as the tag is being read. This can have a significant impact on performance. To limit this, if the user knows the type of a value in advance, specifying it as a type annotation will alleviate the problem:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Below is an example of looping over records in a bam file and using indexing syntax to get the data stored in the \"NM\" tag. Note the UInt8 type assertion to alleviate type instability.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"for record in open(BAM.Reader, \"data.bam\")\n nm = record[\"NM\"]::UInt8\n # do something\nend","category":"page"},{"location":"hts-files/#Getting-records-in-a-range-1","page":"SAM and BAM","title":"Getting records in a range","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"BioAlignments supports the BAI index to fetch records in a specific range from a BAM file. from a BAM file. (https://samtools.github.io/)(https://samtools.github.io/) provides index subcommand to create an index file (.bai) from a sorted BAM file.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"$ samtools index -b SRR1238088.sort.bam\n$ ls SRR1238088.sort.bam*\nSRR1238088.sort.bam SRR1238088.sort.bam.bai","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"eachoverlap(reader, chrom, range) returns an iterator of BAM records overlapping the query interval:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"reader = open(BAM.Reader, \"SRR1238088.sort.bam\", index=\"SRR1238088.sort.bam.bai\")\nfor record in eachoverlap(reader, \"Chr2\", 10000:11000)\n # `record` is a BAM.Record object\n # ...\nend\nclose(reader)","category":"page"},{"location":"hts-files/#Getting-records-overlapping-genomic-features-1","page":"SAM and BAM","title":"Getting records overlapping genomic features","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"eachoverlap also accepts the Interval type defined in GenomicFeatures.jl.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"This allows you to do things like first read in the genomic features from a GFF3 file, and then for each feature, iterate over all the BAM records that overlap with that feature.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"# Load GFF3 module.\nusing GenomicFeatures\nusing BioAlignments\n\n# Load genomic features from a GFF3 file.\nfeatures = open(collect, GFF3.Reader, \"TAIR10_GFF3_genes.gff\")\n\n# Keep mRNA features.\nfilter!(x -> GFF3.featuretype(x) == \"mRNA\", features)\n\n# Open a BAM file and iterate over records overlapping mRNA transcripts.\nreader = open(BAM.Reader, \"SRR1238088.sort.bam\", index = \"SRR1238088.sort.bam.bai\")\nfor feature in features\n for record in eachoverlap(reader, feature)\n # `record` overlaps `feature`.\n # ...\n end\nend\nclose(reader)","category":"page"},{"location":"hts-files/#Writing-files-1","page":"SAM and BAM","title":"Writing files","text":"","category":"section"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"In order to write a BAM or SAM file, you must first create a SAM.Header.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"A SAM.Header is constructed from a vector of SAM.MetaInfo objects.","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"For example, to create the following simple header:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"@HD VN:1.6 SO:coordinate\n@SQ SN:ref LN:45","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> a = SAM.MetaInfo(\"HD\", [\"VN\" => 1.6, \"SO\" => \"coordinate\"])\nSAM.MetaInfo:\n tag: HD\n value: VN=1.6 SO=coordinate\n\njulia> b = SAM.MetaInfo(\"SQ\", [\"SN\" => \"ref\", \"LN\" => 45])\nSAM.MetaInfo:\n tag: SQ\n value: SN=ref LN=45\n\njulia> h = SAM.Header([a, b])\nSAM.Header(SAM.MetaInfo[SAM.MetaInfo:\n tag: HD\n value: VN=1.6 SO=coordinate, SAM.MetaInfo:\n tag: SQ\n value: SN=ref LN=45])\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Then to create the writer for a SAM file, construct a SAM.Writer using the header and an IO type:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> samw = SAM.Writer(open(\"my-data.sam\", \"w\"), h)\nSAM.Writer(IOStream())\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"To make a BAM Writer is slightly different, as you need to use a specific stream type from the (https://github.com/BioJulia/BGZFStreams.jl)(https://github.com/BioJulia/BGZFStreams.jl) package:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> using BGZFStreams\n\njulia> bamw = BAM.Writer(BGZFStream(open(\"my-data.bam\", \"w\"), \"w\"))\nBAM.Writer(BGZFStreams.BGZFStream{IOStream}())\n","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"Once you have a BAM or SAM writer, you can use the write method to write BAM.Records or SAM.Records to file:","category":"page"},{"location":"hts-files/#","page":"SAM and BAM","title":"SAM and BAM","text":"julia> write(bamw, rec) # Here rec is a `BAM.Record`\n330780","category":"page"}] }