Compare commits
10 commits
05bd78f94b
...
127164efc8
| Author | SHA1 | Date | |
|---|---|---|---|
| 127164efc8 | |||
| 676eb6a7eb | |||
| 862ddc35f1 | |||
| d82bf82818 | |||
| 3fcfccccc7 | |||
| 439541f0fc | |||
| fc52da832c | |||
| 50053498ad | |||
| 325940a62c | |||
| c60514550d |
21 changed files with 205 additions and 380 deletions
1
.actrc
1
.actrc
|
|
@ -1 +0,0 @@
|
|||
-P ubuntu-latest=ghcr.io/catthehacker/ubuntu:act-latest
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
bam
|
||||
haplotype
|
||||
haplotypes
|
||||
vcf
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
trunc
|
||||
jldoctest
|
||||
1
.envrc
1
.envrc
|
|
@ -1 +0,0 @@
|
|||
use nix
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
MD007:
|
||||
indent: 2
|
||||
MD024:
|
||||
allow_different_nesting: true
|
||||
MD026:
|
||||
punctuation: ",;:。,;:"
|
||||
MD030:
|
||||
ul_single: 1
|
||||
ul_multi: 1
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
repos:
|
||||
- repo: https://github.com/domluna/JuliaFormatter.jl
|
||||
rev: v1.0.22
|
||||
hooks:
|
||||
- id: julia-formatter
|
||||
- repo: https://github.com/DavidAnson/markdownlint-cli2
|
||||
rev: v0.6.0
|
||||
hooks:
|
||||
- id: markdownlint-cli2-fix
|
||||
exclude: "docs"
|
||||
- repo: https://github.com/pre-commit/mirrors-prettier
|
||||
rev: v2.7.1
|
||||
hooks:
|
||||
- id: prettier
|
||||
additional_dependencies:
|
||||
- prettier@2.7.1
|
||||
- prettier-plugin-sh@0.11.0
|
||||
- prettier-plugin-toml@0.3.1
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
### Julia gitignore ###
|
||||
# Files generated by invoking Julia with --code-coverage
|
||||
*.jl.cov
|
||||
*.jl.*.cov
|
||||
|
||||
# Files generated by invoking Julia with --track-allocation
|
||||
*.jl.mem
|
||||
|
||||
# System-specific files and directories generated by the BinaryProvider and BinDeps packages
|
||||
# They contain absolute paths specific to the host computer, and so should not be committed
|
||||
deps/deps.jl
|
||||
deps/build.log
|
||||
deps/downloads/
|
||||
deps/usr/
|
||||
deps/src/
|
||||
|
||||
# Build artifacts for creating documentation generated by the Documenter package
|
||||
docs/build/
|
||||
docs/site/
|
||||
docs/Manifest.toml
|
||||
|
||||
# File generated by Pkg, the package manager, based on a corresponding Project.toml
|
||||
# It records a fixed state of all packages used by the project. As such, it should not be
|
||||
# committed for packages, but should be committed for applications that require a static
|
||||
# environment.
|
||||
Manifest.toml
|
||||
|
||||
# Files generated during compilation/testing
|
||||
build
|
||||
example/output.*
|
||||
example/reference.fasta.fai
|
||||
|
||||
### Julia prettierignore ###
|
||||
# Project file managed and formatted by Pkg.jl
|
||||
Project.toml
|
||||
|
||||
# Prettier doesn't understand the nuance of Julia markdown, so ignore docs
|
||||
# entirely
|
||||
docs/src/*.md
|
||||
docs/src/*/*.md
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
tabWidth = 4
|
||||
proseWrap = "always"
|
||||
|
||||
[[overrides]]
|
||||
files = "*.{md,yml,yaml,json,Dockerfile,sh,svg}"
|
||||
|
||||
[overrides.options]
|
||||
tabWidth = 2
|
||||
|
||||
[[overrides]]
|
||||
files = "*.md"
|
||||
proseWrap = "always"
|
||||
14
CHANGELOG.md
14
CHANGELOG.md
|
|
@ -8,8 +8,17 @@ and this project adheres to
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.1.0] - 2023-12-22
|
||||
|
||||
### Changed
|
||||
|
||||
- Documentation improved ([#62](https://github.com/ksumngs/pull/62))
|
||||
- Haplotype calling uses smallest possible bit depth
|
||||
([#61](https://github.com/ksumngs/pull/61))
|
||||
- Haplotype calling now uses sparse matrices
|
||||
([#60](https://github.com/ksumngs/HapLink.jl/pull/60))
|
||||
- FASTX.jl downgraded to v1
|
||||
([#56](https://github.com/ksumng/HapLink.jl/pull/56))
|
||||
([#56](https://github.com/ksumngs/HapLink.jl/pull/56))
|
||||
|
||||
## [1.0.0] - 2023-06-04
|
||||
|
||||
|
|
@ -209,7 +218,8 @@ and this project adheres to
|
|||
- `Haplotype`
|
||||
- `Variant`
|
||||
|
||||
[unreleased]: https://github.com/ksumngs/HapLink.jl/compare/v1.0.0...HEAD
|
||||
[unreleased]: https://github.com/ksumngs/HapLink.jl/compare/v1.1.0...HEAD
|
||||
[1.1.0]: https://github.com/ksumngs/HapLink.jl/compare/v1.0.0...v1.1.0
|
||||
[1.0.0]: https://github.com/ksumngs/HapLink.jl/compare/v0.7.1...v1.0.0
|
||||
[0.7.1]: https://github.com/ksumngs/HapLink.jl/compare/v0.7.0...v0.7.1
|
||||
[0.7.0]: https://github.com/ksumngs/HapLink.jl/compare/v0.6.0...v0.7.0
|
||||
|
|
|
|||
41
Dockerfile
41
Dockerfile
|
|
@ -1,41 +0,0 @@
|
|||
FROM ubuntu:focal
|
||||
|
||||
ENV JULIA_VERSION 1.6.5
|
||||
ENV JULIA_DEPOT_PATH /.julia
|
||||
|
||||
# Install the build dependencies
|
||||
RUN \
|
||||
apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
git \
|
||||
ca-certificates \
|
||||
build-essential
|
||||
|
||||
# Install Julia
|
||||
RUN \
|
||||
cd / \
|
||||
&& curl -L "https://julialang-s3.julialang.org/bin/linux/x64/1.6/julia-${JULIA_VERSION}-linux-x86_64.tar.gz" | tar xvz \
|
||||
&& ln -s /julia-${JULIA_VERSION}/bin/julia /usr/bin/julia
|
||||
|
||||
# Install PackageCompiler.jl
|
||||
RUN \
|
||||
julia -e 'using Pkg; Pkg.add("PackageCompiler")'
|
||||
|
||||
# Copy HapLink.jl
|
||||
COPY . /HapLink.jl
|
||||
|
||||
# Clone and build HapLink.jl
|
||||
RUN \
|
||||
cd /HapLink.jl \
|
||||
&& git clean -dfx \
|
||||
&& julia -e 'using Pkg; Pkg.activate("."); Pkg.instantiate()' \
|
||||
&& julia -e 'using PackageCompiler; create_app(".", "build", precompile_execution_file="precompile_app.jl", executables=["haplink" => "haplink"], cpu_target="x86-64")'
|
||||
|
||||
FROM ubuntu:focal
|
||||
|
||||
COPY --from=0 /HapLink.jl/build/bin /usr/bin
|
||||
COPY --from=0 /HapLink.jl/build/lib /usr/lib
|
||||
COPY --from=0 /HapLink.jl/build/share /usr/share
|
||||
|
||||
ENTRYPOINT ["/usr/bin/haplink"]
|
||||
85
Earthfile
85
Earthfile
|
|
@ -1,85 +0,0 @@
|
|||
VERSION 0.7
|
||||
FROM alpine:3.17
|
||||
|
||||
docs:
|
||||
FROM julia:alpine3.18
|
||||
COPY --dir src .
|
||||
COPY --dir docs .
|
||||
COPY Project.toml .
|
||||
RUN apk add --update --no-cache git
|
||||
RUN julia --color=yes --project=docs -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
|
||||
RUN julia --color=yes --project=docs -e 'using Documenter: DocMeta, doctest; using HapLink; DocMeta.setdocmeta!(HapLink, :DocTestSetup, :(using HapLink); recursive=true); doctest(HapLink)'
|
||||
RUN julia --color=yes --project=docs docs/make.jl
|
||||
SAVE ARTIFACT docs/build AS LOCAL local-output/site
|
||||
|
||||
test-all:
|
||||
FOR JULIA_VERSION IN 1 rc 1.6
|
||||
BUILD +test --version $JULIA_VERSION
|
||||
END
|
||||
|
||||
test:
|
||||
ARG version=latest
|
||||
ARG project=@.
|
||||
ARG precompile=no
|
||||
ARG check_bounds=yes
|
||||
ARG coverage=true
|
||||
ARG depwarn=yes
|
||||
ARG force_latest_compatible_version=auto
|
||||
ARG inline=yes
|
||||
ARG prefix=''
|
||||
ARG annotate=false
|
||||
ARG runtest_version=1.9.3
|
||||
FROM julia:$version
|
||||
COPY --dir src .
|
||||
COPY --dir docs .
|
||||
COPY --dir test .
|
||||
COPY Project.toml .
|
||||
ENV JULIA_PKG_PRECOMPILE_AUTO=$precompile
|
||||
ENV ANNOTATE=$annotate
|
||||
ENV COVERAGE=$coverage
|
||||
ENV FORCE_LATEST_COMPATIBLE_VERSION=$force_latest_compatible_version
|
||||
ENV CHECK_BOUNDS=$check_bounds
|
||||
ENV INPUT_DIRECTORIES='src,ext'
|
||||
RUN julia --color=yes --project=$project -e ' \
|
||||
import Pkg; \
|
||||
VERSION >= v"1.5-" && Pkg.Registry.add("General"); \
|
||||
VERSION >= v"1.1.0-rc1" ? Pkg.build(verbose=true) : Pkg.build()'
|
||||
RUN julia --color=yes -e '\
|
||||
if v"1.8pre" < VERSION < v"1.9.0-beta3"; \
|
||||
using Pkg; \
|
||||
Pkg.activate("tests-logger-env"; shared=true); \
|
||||
Pkg.add(Pkg.PackageSpec(name="GitHubActions", version="0.1")); \
|
||||
end'
|
||||
RUN curl -L https://github.com/julia-actions/julia-runtest/archive/refs/tags/v1.9.3.tar.gz | tar xvz
|
||||
RUN julia --color=yes \
|
||||
--depwarn=yes \
|
||||
--inline=yes \
|
||||
--project=@. \
|
||||
-e 'include(joinpath("julia-runtest-1.9.3", "test_harness.jl"))'
|
||||
RUN curl -L https://github.com/julia-actions/julia-processcoverage/archive/refs/tags/v1.2.2.tar.gz | tar xvz
|
||||
RUN julia --color=yes julia-processcoverage-1.2.2/main.jl
|
||||
SAVE ARTIFACT lcov.info AS LOCAL local-output/lcov.$version.info
|
||||
|
||||
compiler:
|
||||
FROM julia:latest
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends curl git ca-certificates build-essential
|
||||
|
||||
build:
|
||||
FROM +compiler
|
||||
COPY --dir src .
|
||||
COPY --dir deps .
|
||||
COPY --dir example .
|
||||
COPY Project.toml .
|
||||
COPY Comonicon.toml .
|
||||
RUN julia --project -e 'using Pkg; Pkg.instantiate()'
|
||||
RUN julia --project deps/build.jl app
|
||||
SAVE ARTIFACT build AS LOCAL build
|
||||
|
||||
docker:
|
||||
FROM ubuntu:focal
|
||||
ARG TAG=latest
|
||||
COPY +build/build/haplink/bin /usr/bin
|
||||
COPY +build/build/haplink/lib /usr/lib
|
||||
COPY +build/build/haplink/share /usr/share
|
||||
ENTRYPOINT ["/usr/bin/haplink"]
|
||||
SAVE IMAGE --push millironx/haplink:${TAG}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
name = "HapLink"
|
||||
uuid = "8ca39d33-de0d-4205-9b21-13a80f2b7eed"
|
||||
authors = ["Thomas A. Christensen II, Kansas State University, and contributors"]
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
|
||||
[deps]
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
|
|
@ -38,14 +38,19 @@ BioSequences = "3"
|
|||
BioSymbols = "5"
|
||||
Combinatorics = "1"
|
||||
Comonicon = "1"
|
||||
Dates = "1.6"
|
||||
Distributions = "0.25"
|
||||
FASTX = "1.3"
|
||||
FilePaths = "0.8"
|
||||
GenomicFeatures = "2"
|
||||
HypothesisTests = "0.10"
|
||||
OrderedCollections = "1.4"
|
||||
Pkg = "1.6"
|
||||
Random = "1.6"
|
||||
SHA = "0.7, 1"
|
||||
SequenceVariation = "0.2.2"
|
||||
SparseArrayKit = "0.2.1"
|
||||
Statistics = "1.6"
|
||||
VariantCallFormat = "0.5"
|
||||
XAM = "0.3.1"
|
||||
YAML = "0.4"
|
||||
|
|
|
|||
156
README.md
156
README.md
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# <img src="./docs/src/assets/logo.png" style="border: 3px solid; float: left; margin: auto 2.5% auto 0" width="30%" > HapLink
|
||||
|
||||
[](https://www.repostatus.org/#wip)
|
||||
[](https://www.repostatus.org/#active)
|
||||
[](https://github.com/ksumngs/HapLink.jl/blob/master/CHANGELOG.md)
|
||||
[](https://github.com/ksumngs/HapLink.jl/)
|
||||
[](https://ksumngs.github.io/HapLink.jl/stable)
|
||||
|
|
@ -10,6 +10,8 @@
|
|||
[](https://github.com/ksumngs/HapLink.jl/actions)
|
||||
[](https://codecov.io/gh/ksumngs/HapLink.jl)
|
||||
[](https://github.com/invenia/BlueStyle)
|
||||
[](https://anaconda.org/bioconda/haplink)
|
||||
[](https://pkgs.genieframework.com?packages=HapLink)
|
||||
|
||||
<!-- markdownlint-enable -->
|
||||
|
||||
|
|
@ -24,43 +26,46 @@ the entire genome. Comes with its own variant caller.
|
|||
|
||||
## Installation
|
||||
|
||||
To run HapLink, your system must meet the following requirements
|
||||
### :snake: Via Bioconda
|
||||
|
||||
- Linux OS
|
||||
- glibc
|
||||
- x86-64 CPU
|
||||
**:warning::penguin: Linux-only!**
|
||||
|
||||
These restrictions apply even when using the package from within Julia.
|
||||
|
||||
If you need to use HapLink somewhere else, everything needed is available in a
|
||||
[Docker image over on Quay].
|
||||
|
||||
### Prebuilt Binaries
|
||||
|
||||
To install our Hot-and-Ready binaries, run the following command:
|
||||
|
||||
<!-- markdownlint-disable -->
|
||||
_Recommended for running HapLink on the **command line**_
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.local/opt/HapLink-0.7.1
|
||||
curl -L https://github.com/ksumngs/HapLink.jl/releases/download/v0.7.1/HapLink-v0.7.1_linux.x86_64.tar.gz | tar xzv -C ~/.local/opt/HapLink-0.7.1
|
||||
ln -s ~/.local/opt/HapLink-0.7.1/bin/haplink ~/.local/bin
|
||||
conda create -n haplink -c bioconda -c conda-forge haplink -y
|
||||
conda activate haplink
|
||||
```
|
||||
|
||||
<!-- markdownlint-enable -->
|
||||
### ∴ Via Julia REPL
|
||||
|
||||
### Julia Package
|
||||
_Recommended for running HapLink within a **Julia session**_
|
||||
|
||||
HapLink is not in the General Registry (yet!), so install using the `URL#tag`
|
||||
syntax to use in the REPL.
|
||||
```julia-repl
|
||||
julia> ]
|
||||
(@v1.6) pkg> add HapLink
|
||||
```
|
||||
|
||||
```julia
|
||||
using Pkg; Pkg.add("https://github.com/ksumngs/HapLink#v0.7.1")
|
||||
To use this install of HapLink from the command line, you will need to add
|
||||
`$HOME/.julia/bin` to your `$PATH`.
|
||||
|
||||
### :package: Via Apptainer
|
||||
|
||||
_Recommended for running HapLink on a **HPC**_
|
||||
|
||||
```bash
|
||||
apptainer pull docker://ghcr.io/ksumngs/haplink.jl
|
||||
```
|
||||
|
||||
### :whale: Via Docker
|
||||
|
||||
```bash
|
||||
docker pull ghcr.io/ksumngs/haplink.jl:latest
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Please check [the docs] for actually useful instructions on how to use HapLink,
|
||||
Please check [the docs] for more detailed instructions on how to use HapLink,
|
||||
both on the command line and in the REPL.
|
||||
|
||||
The basic flow of HapLink is
|
||||
|
|
@ -79,19 +84,20 @@ You can see how this works using the files in the [example directory]:
|
|||
|
||||
```bash
|
||||
haplink variants \
|
||||
--bam example/sample.bam \
|
||||
--reference example/reference.fasta
|
||||
--output sample.vcf
|
||||
example/reference.fasta \
|
||||
example/sample.bam \
|
||||
> sample.vcf
|
||||
|
||||
haplink haplotypes \
|
||||
--bam example/sample.bam \
|
||||
--variants sample.vcf \
|
||||
--output sample.yaml
|
||||
example/reference.fasta \
|
||||
sample.vcf \
|
||||
example/sample.bam \
|
||||
> sample.yaml
|
||||
|
||||
haplink sequences \
|
||||
--haplotypes sample.yaml \
|
||||
--reference example/reference.fasta \
|
||||
--output sample.fasta
|
||||
example/reference.fasta \
|
||||
sample.yaml \
|
||||
sample.fasta
|
||||
```
|
||||
|
||||
## Development
|
||||
|
|
@ -100,12 +106,23 @@ HapLink is written in [Julia]. While the focus of the program is the command
|
|||
line interface (CLI), it also exposes a nearly identical API in the form of a
|
||||
Julia Package, which is described in [the docs].
|
||||
|
||||
### Development environment
|
||||
|
||||
For consistency, the recommended version of Julia as well as all the recommended
|
||||
formatters and commit hooks are listed in a Nix file. If you have [direnv] and
|
||||
[Nix] installed, then simply run
|
||||
|
||||
```bash
|
||||
direnv allow .
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
to setup Julia and the commit hook tools.
|
||||
|
||||
### Editing the package
|
||||
|
||||
HapLink.jl is a self-contained Julia package, and its development process is
|
||||
identical to any other package as discussed in the [Pkg documentation].
|
||||
Personally, I tend to avoid the `dev` mode, and work straight from the cloned
|
||||
package directory.
|
||||
|
||||
```shellsession
|
||||
$ git clone https://github.com/ksumngs/HapLink.jl.git
|
||||
|
|
@ -117,75 +134,22 @@ julia> using HapLink
|
|||
julia> ...
|
||||
```
|
||||
|
||||
### Creating the CLI application
|
||||
To test your changes on the command line application, ensure that
|
||||
`$HOME/.julia/bin` is on your `$PATH`, then from the Julia REPL
|
||||
|
||||
To work with the CLI directly, you can do one (or both) of the following
|
||||
|
||||
#### 1. Create a shim
|
||||
|
||||
> - _Fast to implement_
|
||||
> - _Changes are reflected immediately_
|
||||
> - _Slow execution time ([TTFP])_
|
||||
|
||||
In my `~/bin` directory, I have an executable file named `haplink` with the
|
||||
following contents:
|
||||
|
||||
```bash
|
||||
#!/bin/sh
|
||||
julia --project=$HOME/src/HapLink.jl -e 'using HapLink.haplink()' "$@"
|
||||
```julia-repl
|
||||
julia> ]
|
||||
(@v1.6) pkg> activate .
|
||||
(HapLink) pkg> build
|
||||
```
|
||||
|
||||
#### 2. Compile the binary
|
||||
|
||||
> - _More involved implementation_
|
||||
> - _Updates must be recompiled_
|
||||
> - _Fast execution time_
|
||||
|
||||
Binaries are compiled using [PackageCompiler.jl], using the recipe in
|
||||
[.github/workflows/build.yml].
|
||||
|
||||
1. Get the [official Julia release] (disto packages generally don't work)
|
||||
2. Install PackageCompiler into that Julia depot
|
||||
|
||||
```shellsession
|
||||
(@v1.6) pkg> install PackageCompiler
|
||||
```
|
||||
|
||||
3. Run `PackageCompiler.create_app()` with the following options
|
||||
|
||||
```julia
|
||||
using PackageCompiler
|
||||
create_app(
|
||||
"/path/to/HapLink.jl",
|
||||
"/path/to/output",
|
||||
precompile_execution_file="precompile_app.jl",
|
||||
executables=["haplink" => "haplink"],
|
||||
cpu_target="x86-64",
|
||||
)
|
||||
```
|
||||
|
||||
Compilation can take over 15 minutes to complete, so be patient!
|
||||
|
||||
## Contributors
|
||||
|
||||
It's pretty lonely here: HapLink was solely made by Thomas Christensen while
|
||||
working at Kansas State University. Why don't you [open a pull request] and fix
|
||||
that?
|
||||
This will update the application shim to include your changes.
|
||||
|
||||
[semver]: https://semver.org
|
||||
[oneflow]:
|
||||
https://www.endoflineblog.com/oneflow-a-git-branching-model-and-workflow
|
||||
[docker image over on quay]:
|
||||
https://quay.io/repository/millironx/julia_bam-readcounts
|
||||
[the docs]: https://ksumngs.github.io/HapLink.jl/stable
|
||||
[example directory]: https://github.com/ksumngs/HapLink.jl/tree/master/example
|
||||
[julia]: https://julialang.org
|
||||
[pkg documentation]:
|
||||
https://pkgdocs.julialang.org/v1/managing-packages/#developing
|
||||
[ttfp]: https://viralinstruction.com/posts/badjulia/#compile_time_latency
|
||||
[packagecompiler.jl]:
|
||||
https://julialang.github.io/PackageCompiler.jl/stable/apps.html
|
||||
[.github/workflows/build.yml]:
|
||||
https://github.com/ksumngs/HapLink.jl/blob/master/.github/workflows/build.yml
|
||||
[official julia release]: https://julialang.org/downloads/
|
||||
[open a pull request]: https://github.com/ksumngs/HapLink.jl/compare
|
||||
|
|
|
|||
11
cspell.json
11
cspell.json
|
|
@ -1,11 +0,0 @@
|
|||
{
|
||||
"version": "0.2",
|
||||
"language": "en",
|
||||
"allowCompoundWords": true,
|
||||
"dictionaryDefinitions": [
|
||||
{ "name": "bioinformatics", "path": "./.cspell/bioinformatics.txt" },
|
||||
{ "name": "julia", "path": "./.cspell/julia.txt" }
|
||||
],
|
||||
"dictionaries": ["bioinformatics", "julia"],
|
||||
"includeRegExpList": ["#.*", "string"]
|
||||
}
|
||||
10
default.nix
10
default.nix
|
|
@ -1,10 +0,0 @@
|
|||
{ pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/2f82431c7fdfa641f9816011286a2fa2c489eedb.tar.gz") {} }:
|
||||
|
||||
pkgs.mkShell {
|
||||
buildInputs = [
|
||||
pkgs.julia
|
||||
pkgs.pre-commit
|
||||
pkgs.nodejs
|
||||
];
|
||||
|
||||
}
|
||||
|
|
@ -14,22 +14,16 @@ HapLink
|
|||
## Welcome
|
||||
|
||||
Howdy! 🤠 And welcome to HapLink! 👋 HapLink is a command-line suite of tools to
|
||||
enable the exploration of viral quasispecies within a single metagenomic sample.
|
||||
Every piece eventually builds up to our viral haplotype caller, which uses
|
||||
linkage disequilibrium on long sequencing reads (💡 think
|
||||
enable the exploration of viral quasispecies within a single sample.
|
||||
Our viral haplotype caller uses
|
||||
linkage disequilibrium on long sequencing reads (think
|
||||
[Oxford Nanopore](https://nanoporetech.com/) or
|
||||
[PacBio HiFi](https://www.pacb.com/)) to identify genetic mutations that are
|
||||
conserved within a single virus particle.
|
||||
likely conserved within a single virus particle.
|
||||
|
||||
This manual will cover the different ways of using HapLink, starting with a few
|
||||
tutorials before diving into the details of our reference section.
|
||||
|
||||
### Contents
|
||||
|
||||
```@contents
|
||||
|
||||
```
|
||||
|
||||
## Getting started
|
||||
|
||||
Ready to dive in? 🤿 Here's a 30,000-foot view
|
||||
|
|
@ -45,9 +39,9 @@ julia \
|
|||
echo 'export PATH=$HOME/.julia/bin:$PATH' >> $HOME/.bashrc
|
||||
source ~/.bashrc
|
||||
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/reference.fasta
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/sample.bam
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/sample.bam.bai
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/reference.fasta
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/sample.bam
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/sample.bam.bai
|
||||
|
||||
haplink variants \
|
||||
reference.fasta \
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
# [In the beginning](@id install-tutorial)
|
||||
|
||||
There are many different ways to install HapLink. Here we walk you through two
|
||||
of the most common. If you're one of the 0.01% who needs a different method,
|
||||
then we trust you can extrapolate from these instructions. Note that all of
|
||||
these tutorials assume you have a Unix-type system (MacOS, BSD, Linux). Windows
|
||||
command-line support is basically non-existant!
|
||||
There are many different ways to install HapLink. Note that some of these
|
||||
install methods are platform-specific.
|
||||
|
||||
```@contents
|
||||
Pages = ["1-install.md"]
|
||||
|
|
@ -18,6 +15,10 @@ here to judge. 👩⚖️ It's easy and portable and is bundled on most HPCs.
|
|||
already have conda (or [mamba](https://mamba.readthedocs.io/en/latest/)), then
|
||||
this route is probably for you.
|
||||
|
||||
!!! warning
|
||||
|
||||
Bioconda install is only supported on Linux
|
||||
|
||||
### Install HapLink inside a conda environment
|
||||
|
||||
We'll make a new environment with the totally original name "haplink," to house
|
||||
|
|
@ -43,14 +44,83 @@ Next, cross your fingers 🤞 and run the following command:
|
|||
haplink --help
|
||||
```
|
||||
|
||||
Check for error messages, but otherwise you're done. You can reuse your
|
||||
The most common error is
|
||||
|
||||
```shellsession
|
||||
The following package could not be installed
|
||||
└─ haplink does not exist (perhaps a typo or a missing channel).
|
||||
```
|
||||
|
||||
If this happens,
|
||||
|
||||
1. Check your spelling in the install command
|
||||
2. Check that you are using an x86-64 version of conda on Linux
|
||||
|
||||
Another common error is
|
||||
|
||||
```shellsession
|
||||
bash: haplink: command not found
|
||||
bash: /bin/julia: No such file or directory
|
||||
```
|
||||
|
||||
If this happens, check that `CONDA_PREFIX` is set correctly by running
|
||||
`echo "$CONDA_PREFIX"`, and/or rerun `conda activate haplink`.
|
||||
|
||||
If there are no error messages, you're done. You can reuse your
|
||||
`haplink` environment for the [next tutorial](@ref cli-tutorial).
|
||||
|
||||
## Comonicon
|
||||
## Container install
|
||||
|
||||
One option for installing HapLink is don't install HapLink. Or rather, pull a
|
||||
[container](https://apptainer.org/docs/user/1.2/introduction.html#why-use-containers)
|
||||
that already has HapLink installed, and process files inside of it. HapLink
|
||||
provides a Docker container that has been tested on [Apptainer](https://apptainer.org).
|
||||
You should be able to use nearly any container software to run HapLink, but we
|
||||
recommend Apptainer, due to its ubiquity on HPCs, simple file permissions, and
|
||||
increased security.
|
||||
|
||||
### Download the container
|
||||
|
||||
With Apptainer installed, run
|
||||
|
||||
```bash
|
||||
apptainer pull docker://ghcr.io/ksumngs/haplink.jl
|
||||
```
|
||||
|
||||
!!! info "Output"
|
||||
|
||||
- haplink.jl_latest.sif
|
||||
|
||||
### Run the container as a one-off
|
||||
|
||||
You can check to see if the container downloaded correctly by using the
|
||||
`apptainer exec` command.
|
||||
|
||||
```bash
|
||||
apptainer exec haplink.jl_latest.sif haplink --version
|
||||
```
|
||||
|
||||
### Enter the container to run multiple commands
|
||||
|
||||
For more complex commands, it is often better to enter the container's shell
|
||||
environment and execute commands within the container. Apptainer will include
|
||||
all files in your working directory as part of the container when doing this.
|
||||
|
||||
```shellsession
|
||||
$ apptainer shell haplink.jl_latest.sif
|
||||
Apptainer> haplink --version
|
||||
```
|
||||
|
||||
## Julia dependent-install
|
||||
|
||||
HapLink is unashamedly a Julia program. If you already have Julia installed,
|
||||
then you can leverage that existing Julia install to install HapLink thanks to
|
||||
the power of [Comonicon.jl](https://comonicon.org/).
|
||||
then you can leverage that existing Julia install to install HapLink.
|
||||
|
||||
!!! tip
|
||||
|
||||
Under the hood, HapLink can self-install thanks to the power of
|
||||
[Comonicon.jl](https://comonicon.org/). Check out their docs if you want to
|
||||
learn more, or want to troubleshoot a direct install.
|
||||
|
||||
### Check your Julia version
|
||||
|
||||
|
|
@ -63,9 +133,9 @@ julia --version
|
|||
|
||||
### Add HapLink to a temporary environment and install
|
||||
|
||||
Using the magic 🪄 of Julia's environments, we can do a "temp install" of the
|
||||
Using the magic of Julia's environments, we can do a "temp install" of the
|
||||
HapLink package to a temporary directory environment. Because this is a fresh
|
||||
install, though, it will trigger Comonicon to install the application to a
|
||||
install, though, it will trigger an installation of the application to a
|
||||
brand-new isolated environment.
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# [Kicking the tires](@id cli-tutorial)
|
||||
# [Starting at the command line](@id cli-tutorial)
|
||||
|
||||
At this point, we'll play with the example sequences included _gratis_ 💰 with
|
||||
HapLink. No, they don't represent anything ☁️, and they aren't particularly
|
||||
HapLink. No, they don't represent anything, and they aren't particularly
|
||||
interesting 🥱, but they **do** run fast 🏇, so we can get a handle on how the
|
||||
interface and workflow operate.
|
||||
|
||||
|
|
@ -9,14 +9,14 @@ interface and workflow operate.
|
|||
Pages = ["2-examples.md"]
|
||||
```
|
||||
|
||||
## Getting the goods
|
||||
## Getting the goods: extracting example files
|
||||
|
||||
Let's get the example files from the code repository. In your terminal, run
|
||||
|
||||
```bash
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/reference.fasta
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/sample.bam
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/sample.bam.bai
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/reference.fasta
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/sample.bam
|
||||
wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0/example/sample.bam.bai
|
||||
```
|
||||
|
||||
!!! info "Output"
|
||||
|
|
@ -25,7 +25,7 @@ wget https://github.com/ksumngs/HapLink.jl/raw/v1.0.0-rc1/example/sample.bam.bai
|
|||
- sample.bam
|
||||
- sample.bam.bai
|
||||
|
||||
## Spot the difference
|
||||
## Spot the difference: differentiating between sequence error and mutations
|
||||
|
||||
In order for HapLink to call haplotypes, it needs to know which sequence
|
||||
differences are due to sequencing errors, and which are due to genetic mutation.
|
||||
|
|
@ -43,11 +43,11 @@ haplink variants reference.fasta sample.bam
|
|||
_None_
|
||||
|
||||
HapLink by default outputs to standard output, so the variant calls were printed
|
||||
on your screen instead of saved 😡. That's okay, though 😌. It's often good to
|
||||
visually check your variant calls, and it this case we absolutely needed to.
|
||||
on your screen instead of saved. That's okay, though. It's often good to
|
||||
visually check your variant calls, and in this case, we absolutely needed to.
|
||||
Notice that none of the variants got a `PASS` filter. In fact, all of them were
|
||||
weeded out by too high of thresholds for depth (remember we only have 10
|
||||
sequences) and significance. Let's readjust (and save our results this time).
|
||||
weeded out by the depth threshold (remember we only have 10 sequences) and
|
||||
significance. Let's readjust (and save our results this time).
|
||||
|
||||
```bash
|
||||
haplink \
|
||||
|
|
@ -65,7 +65,7 @@ haplink \
|
|||
|
||||
These settings seemed to work out well. Let's stick with them and move on.
|
||||
|
||||
## The general lay of the land
|
||||
## The general lay of the land: generating consensus sequences from variant calls
|
||||
|
||||
At this point, we're going to take a break from haplotype calling and convert
|
||||
those variant calls into a useful summary: the consensus sequence. HapLink can
|
||||
|
|
@ -79,10 +79,10 @@ haplink consensus reference.fasta sample.vcf | tee sample.consensus.fasta
|
|||
|
||||
- sample.consensus.fasta
|
||||
|
||||
## The star attraction
|
||||
## The star attraction: calling haplotypes from sequence data
|
||||
|
||||
And now it's time for haplotype calling. Before you get your hopes up, there are
|
||||
no _true_ haplotypes in this file. If 10 reads could yield subconsenus
|
||||
no _true_ haplotypes in this file. If 10 reads could manifest subconsenus
|
||||
mysteries, then bioinformatics would be a super easy job. Alas, we live in the
|
||||
real world, and we'll have to stretch mathematical constructs to get anything
|
||||
out of these reads.
|
||||
|
|
@ -106,7 +106,7 @@ You can see that HapLink found only one haplotype in this alignment, but
|
|||
formatted in HapLink's haplotype scheme. The first haplotype in any output file
|
||||
is always the consensus sequence.
|
||||
|
||||
## Haplotypes in the Matrix
|
||||
## Haplotypes in the Matrix: simulating additional reads during haplotype calling
|
||||
|
||||
If you have reads that don't span the entire genome (like we have here), you can
|
||||
use HapLink's maximum likelihood simulator to "create" full-length reads by
|
||||
|
|
@ -136,7 +136,7 @@ Still nothing, huh? Like I said, no haplotypes here, and simulation can't change
|
|||
that. Note that simulating full-length reads used _a lot_ more computational
|
||||
power, so you should try to stick with full-length reads when you can!
|
||||
|
||||
## But, what does it mean?
|
||||
## But, what does it mean? Reformatting into fasta format
|
||||
|
||||
HapLink's haplotype YAML files contain everything needed to recreate the
|
||||
haplotype computation, but they can't really be used by any other programs.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# [Playing well with others](@id integration-tutorial)
|
||||
# [Playing well with others: Combining HapLink with external tools](@id integration-tutorial)
|
||||
|
||||
HapLink is not a one-man show: it definitely knows how to cooperate with other
|
||||
HapLink is not a one-man show: it knows how to cooperate with other
|
||||
tools! In this tutorial, we'll let HapLink do the haplotype calling, but use
|
||||
other tools to go from reads to variant calls, and from haplotypes to
|
||||
phylogenies.
|
||||
|
|
@ -53,7 +53,7 @@ esearch \
|
|||
Next, we'll download one of the pools from the validation set from SRA.
|
||||
|
||||
```bash
|
||||
fasterq-dump "SUB13489216"
|
||||
fasterq-dump --concatenate-reads -X "SRR24796010" | gzip > "IDV-Aug2022-P2.fastq.gz"
|
||||
```
|
||||
|
||||
!!! info "Output"
|
||||
|
|
@ -66,7 +66,7 @@ fasterq-dump "SUB13489216"
|
|||
We have a set of Nanopore reads and a reference genome to go with them. We'll
|
||||
use [minimap2](https://doi.org/10.1093/bioinformatics/bty191) to align the reads
|
||||
to reference. minimap2 requires the `-a` flag to output in SAM format, and uses
|
||||
the `-x` flag to tweak the settings for optimal Nanoore alignment. We then run
|
||||
the `-x` flag to tweak the settings for optimal Nanopore alignment. We then run
|
||||
those reads through `samtools sort` and `samtools index` to reduce the
|
||||
computational load needed to find reads by our downstream tools, and
|
||||
`samtools view -b` to convert the SAM file into a compressed BAM file.
|
||||
|
|
|
|||
|
|
@ -4,9 +4,16 @@ Julia is an ahead-of-time compiled language. Practically, that means that every
|
|||
time you restart Julia, you have to recompile all the code you were running.
|
||||
Using HapLink on the command line involves up to four different commands.
|
||||
Translation: up to four cases where you lose time to recompiling code that was
|
||||
just running. Surely there's a better way, right? Yep, you can stay within a
|
||||
just running. Surely there's a better way, right? Well, you can stay within a
|
||||
single Julia session by using HapLink's REPL mode.
|
||||
|
||||
!!! tip
|
||||
|
||||
Julia's latency (aka, Time-to-first-plot or TTFP) is a big deal among Julia
|
||||
programmers. Although there's no "definitive" place to learn about TTFP,
|
||||
[Jakob Nissen's blog](https://viralinstruction.com/posts/badjulia/#compile_time_latency)
|
||||
provides some great explanations and actionable advice for reducing latency.
|
||||
|
||||
```@contents
|
||||
Pages = ["4-repl.md"]
|
||||
```
|
||||
|
|
@ -137,7 +144,7 @@ map!(
|
|||
|
||||
Now that we have a consensus sequence, we can properly import the reads for
|
||||
haplotype calling into HapLink's specialized [`Pseudoread`](@ref) class.
|
||||
There is a convient [`pseudoreads`](@ref) function that can directly convert a
|
||||
There is a convenient [`pseudoreads`](@ref) function that can directly convert a
|
||||
BAM file for us.
|
||||
|
||||
```@repl main
|
||||
|
|
|
|||
|
|
@ -253,7 +253,16 @@ dimensional matrix.
|
|||
function occurrence_matrix(
|
||||
haplotype::AbstractArray{Variation{S,T}}, reads::AbstractArray{Haplotype{S,T}}
|
||||
) where {S<:BioSequence,T<:BioSymbol}
|
||||
hapcounts = SparseArray{UInt}(undef, Tuple(repeat([2], length(haplotype))))
|
||||
Q = UInt
|
||||
for int_type in [UInt8, UInt16, UInt32, UInt64, UInt128]
|
||||
if length(reads) < typemax(int_type)
|
||||
Q = int_type
|
||||
break
|
||||
end #if
|
||||
error("Too many reads to represent in memory")
|
||||
end #for
|
||||
|
||||
hapcounts = SparseArray{Q}(undef, Tuple(repeat([2], length(haplotype))))
|
||||
|
||||
for read in reads
|
||||
coordinates = zeros(Int, size(haplotype))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue