Compare commits
4 commits
0c1ba0470b
...
3130e92395
| Author | SHA1 | Date | |
|---|---|---|---|
| 3130e92395 | |||
| 9df8163123 | |||
| 81f49a8ad8 | |||
| 9a91fed82f |
7 changed files with 274 additions and 1 deletions
1
.JuliaFormatter.toml
Normal file
1
.JuliaFormatter.toml
Normal file
|
|
@ -0,0 +1 @@
|
|||
style="blue"
|
||||
28
.gitignore
vendored
28
.gitignore
vendored
|
|
@ -2,4 +2,30 @@
|
|||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/extensions.json
|
||||
|
||||
### Julia gitignore ###
|
||||
## Files generated by invoking Julia with --code-coverage
|
||||
*.jl.cov
|
||||
*.jl.*.cov
|
||||
|
||||
# Files generated by invoking Julia with --track-allocation
|
||||
*.jl.mem
|
||||
|
||||
# System-specific files and directories generated by the BinaryProvider and BinDeps packages
|
||||
# They contain absolute paths specific to the host computer, and so should not be committed
|
||||
deps/deps.jl
|
||||
deps/build.log
|
||||
deps/downloads/
|
||||
deps/usr/
|
||||
deps/src/
|
||||
|
||||
# Build artifacts for creating documentation generated by the Documenter package
|
||||
docs/build/
|
||||
docs/site/
|
||||
|
||||
# File generated by Pkg, the package manager, based on a corresponding Project.toml
|
||||
# It records a fixed state of all packages used by the project. As such, it should not be
|
||||
# committed for packages, but should be committed for applications that require a static
|
||||
# environment.
|
||||
Manifest.toml
|
||||
|
|
|
|||
35
Project.toml
Normal file
35
Project.toml
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name = "Cowcalf_rumen_metagenomic_pipeline"
|
||||
uuid = "7d0d08ee-6932-474e-8e49-cd3f4679ce2d"
|
||||
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> and contributors"]
|
||||
version = "0.1.0"
|
||||
|
||||
[deps]
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
|
||||
Cowsay = "b6370f49-8ad1-4651-ad9e-3639b35da0e9"
|
||||
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
|
||||
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
|
||||
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
|
||||
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
|
||||
|
||||
[compat]
|
||||
CSV = "0.10.15"
|
||||
Conda = "1.10.2"
|
||||
Cowsay = "1.0.0"
|
||||
Dagger = "0.18.14"
|
||||
DataFrames = "1.7.0"
|
||||
Distributed = "1.11.0"
|
||||
Glob = "1.3.1"
|
||||
HTTP = "1.10.16"
|
||||
URIs = "1.5.2"
|
||||
YAML = "0.4.13"
|
||||
julia = "1.11"
|
||||
|
||||
[extras]
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[targets]
|
||||
test = ["Test"]
|
||||
8
conda_envs/metaxa2.yml
Normal file
8
conda_envs/metaxa2.yml
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
channels:
|
||||
- bioconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- metaxa=2.2
|
||||
- blast-legacy=2.2.26
|
||||
- hmmer=3.1
|
||||
- mafft=7.525
|
||||
90
src/Cowcalf_rumen_metagenomic_pipeline.jl
Normal file
90
src/Cowcalf_rumen_metagenomic_pipeline.jl
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
module Cowcalf_rumen_metagenomic_pipeline
|
||||
|
||||
using Conda: Conda, runconda
|
||||
using Cowsay: cowsay
|
||||
using CSV: CSV
|
||||
using DataFrames: DataFrame, rename!
|
||||
using Distributed: pmap, @everywhere
|
||||
using Glob: GlobMatch, glob
|
||||
using HTTP: HTTP
|
||||
using URIs: URI
|
||||
using YAML: YAML
|
||||
|
||||
export main
|
||||
|
||||
include("Metaxa.jl")
|
||||
using .Metaxa: Metaxa
|
||||
|
||||
_fetch_yaml_contents(yaml::AbstractString) = YAML.load_file(yaml)
|
||||
_fetch_yaml_contents(yaml::URI) = YAML.load(String(HTTP.get(yaml).body))
|
||||
|
||||
function setup_remote_conda_environment(yaml::Union{AbstractString,URI}, env_name::Symbol)
|
||||
ENV["CONDA_JL_USE_MINIFORGE"] = "1"
|
||||
|
||||
# Install x86 packages via Rosetta2 on MacOS
|
||||
if Sys.isapple()
|
||||
ENV["CONDA_SUBDIR"] = "osx-64"
|
||||
runconda(`config --env --set subdir osx-64`, env_name)
|
||||
end #if
|
||||
|
||||
conda_definition = _fetch_yaml_contents(yaml)
|
||||
|
||||
map(c -> Conda.add_channel(c, env_name), conda_definition["channels"])
|
||||
Conda.add(conda_definition["dependencies"], env_name)
|
||||
|
||||
return env_name
|
||||
end #function
|
||||
|
||||
function import_metadata(metadata_tsv::AbstractString)
|
||||
df = DataFrame(CSV.File(metadata_tsv))
|
||||
rename!(df, 1 => :sample_name)
|
||||
return df
|
||||
end #function
|
||||
|
||||
function sample_files(samplenames::Vector{<:AbstractString})
|
||||
function _s(samplename::AbstractString)
|
||||
# Use explicit GlobMatch constructor b/c we need to interpolate values
|
||||
return (
|
||||
samplename,
|
||||
(
|
||||
abspath(first(glob(GlobMatch("$(samplename)*1*.fastq.gz")))),
|
||||
abspath(first(glob(GlobMatch("$(samplename)*2*.fastq.gz")))),
|
||||
),
|
||||
)
|
||||
end #function
|
||||
|
||||
return map(_s, samplenames)
|
||||
end #function
|
||||
|
||||
function (@main)(ARGS)
|
||||
metadata_file = pop!(ARGS)
|
||||
|
||||
setup_remote_conda_environment(
|
||||
URI(
|
||||
"https://data.qiime2.org/distro/metagenome/qiime2-metagenome-2024.10-py310-osx-conda.yml",
|
||||
),
|
||||
:qiime2,
|
||||
)
|
||||
setup_remote_conda_environment(
|
||||
joinpath(@__DIR__, "..", "conda_envs", "metaxa2.yml"),
|
||||
:metaxa2,
|
||||
)
|
||||
|
||||
metadata = import_metadata(metadata_file)
|
||||
fastq_files = sample_files(metadata[!, :sample_name])
|
||||
|
||||
@eval begin
|
||||
@everywhere begin
|
||||
include(joinpath(@__DIR__, "Metaxa.jl"))
|
||||
using .Metaxa: Metaxa
|
||||
end #@everywhere
|
||||
end #@eval
|
||||
taxonomy_files = pmap(x -> Metaxa.taxonomy(first(x), last(x)), fastq_files)
|
||||
feature_table = Metaxa.data_collector(taxonomy_files...)
|
||||
cp(feature_table, pwd())
|
||||
|
||||
cowsay("Hello from Cowcalf_rumen_metagenomic_pipeline")
|
||||
return 0
|
||||
end #function
|
||||
|
||||
end #module
|
||||
77
src/Metaxa.jl
Normal file
77
src/Metaxa.jl
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
module Metaxa
|
||||
|
||||
using Conda: runconda
|
||||
|
||||
include("ProcessHelper.jl")
|
||||
|
||||
using .ProcessHelper: exec_in_temp_dir
|
||||
|
||||
export taxonomy
|
||||
|
||||
function _classifier(
|
||||
samplename::AbstractString,
|
||||
fastq1::AbstractString,
|
||||
fastq2::AbstractString,
|
||||
)
|
||||
runconda(
|
||||
`run metaxa2 \
|
||||
-1 $fastq1 \
|
||||
-2 $fastq2 \
|
||||
-o $samplename \
|
||||
--format fastq \
|
||||
--cpu 4 \
|
||||
--summary F \
|
||||
--graphical F \
|
||||
--fasta F \
|
||||
--taxonomy T
|
||||
`,
|
||||
:metaxa2,
|
||||
)
|
||||
ispath("$samplename.taxonomy.txt") ||
|
||||
error("metaxa2 ran, but $samplename.taxonomy.txt was not found!")
|
||||
return abspath("$samplename.taxonomy.txt")
|
||||
end #function
|
||||
|
||||
function _taxonomy_traversal(samplename::AbstractString, taxonomy::AbstractString)
|
||||
runconda(
|
||||
`run metaxa2_ttt \
|
||||
-i $taxonomy \
|
||||
-o $samplename \
|
||||
-m 7 \
|
||||
-n 7 \
|
||||
--summary F
|
||||
`,
|
||||
:metaxa2,
|
||||
)
|
||||
ispath("$samplename.level_7.txt") ||
|
||||
error("metaxa2 ran, but $samplename.level_7.txt was not found!")
|
||||
return abspath("$samplename.level_7.txt")
|
||||
end #function
|
||||
|
||||
function taxonomy(
|
||||
samplename::AbstractString,
|
||||
fastq::Tuple{<:AbstractString,<:AbstractString},
|
||||
)
|
||||
taxonomy_file = exec_in_temp_dir(_classifier, samplename, fastq...)
|
||||
level_7_taxonomy_file = exec_in_temp_dir(_taxonomy_traversal, samplename, taxonomy_file)
|
||||
return level_7_taxonomy_file
|
||||
end #function
|
||||
|
||||
function _dc(taxonomies::AbstractString...)
|
||||
runconda(
|
||||
`metaxa2_dc \
|
||||
-o feature-table.tsv \
|
||||
$(join(taxonomies, ' '))
|
||||
`,
|
||||
:metaxa2,
|
||||
)
|
||||
ispath("feature-table.tsv") ||
|
||||
error("metaxa2 ran, but feature-table.tsv was not found!")
|
||||
return abspath("feature-table.tsv")
|
||||
end #function
|
||||
|
||||
function data_collector(taxonomies::AbstractString...)
|
||||
return exec_in_temp_dir(_dc, taxonomies...)
|
||||
end #function
|
||||
|
||||
end #module
|
||||
36
src/ProcessHelper.jl
Normal file
36
src/ProcessHelper.jl
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
module ProcessHelper
|
||||
|
||||
export sym_temp
|
||||
|
||||
"""
|
||||
sym_temp(file::AbstractString) -> (tmp_dir, link)
|
||||
sym_temp(files::Tuple{<:AbstractString,<:AbstractString}) -> (tmp_dir, links)
|
||||
|
||||
Copies `file(s)` to a new temporary directory named `tmp_dir` and symbolically links them
|
||||
inside of that directory. Returns a tuple of the directory path and the path to the links.
|
||||
"""
|
||||
function sym_temp(files::AbstractString...)
|
||||
tmp_dir = mktempdir(; cleanup = false)
|
||||
@info "Creating temporary directory $tmp_dir"
|
||||
|
||||
function _symlink(file::AbstractString)
|
||||
symlink_path = joinpath(tmp_dir, basename(file))
|
||||
symlink(realpath(file), symlink_path)
|
||||
@info "Symlinked $file to $symlink_path"
|
||||
return symlink_path
|
||||
end #function
|
||||
|
||||
return (tmp_dir, map(_symlink, files))
|
||||
end #function
|
||||
|
||||
function exec_in_temp_dir(f::Function, samplename::AbstractString, files::AbstractString...)
|
||||
tmp_dir, tmp_files = sym_temp(files...)
|
||||
return cd(() -> f(samplename, tmp_files...), tmp_dir)
|
||||
end #function
|
||||
|
||||
function exec_in_temp_dir(f::Function, files::AbstractString...)
|
||||
tmp_dir, tmp_files = sym_temp(files...)
|
||||
return cd(() -> f(tmp_files...), tmp_dir)
|
||||
end #function
|
||||
|
||||
end #module
|
||||
Loading…
Add table
Add a link
Reference in a new issue