Compare commits

..

No commits in common. "3130e92395724b9115854b3019678dad20615d4d" and "0c1ba0470b87907a5a56219fe02b23de26e2725a" have entirely different histories.

7 changed files with 1 additions and 274 deletions

View file

@ -1 +0,0 @@
style="blue"

28
.gitignore vendored
View file

@ -2,30 +2,4 @@
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
### Julia gitignore ###
## Files generated by invoking Julia with --code-coverage
*.jl.cov
*.jl.*.cov
# Files generated by invoking Julia with --track-allocation
*.jl.mem
# System-specific files and directories generated by the BinaryProvider and BinDeps packages
# They contain absolute paths specific to the host computer, and so should not be committed
deps/deps.jl
deps/build.log
deps/downloads/
deps/usr/
deps/src/
# Build artifacts for creating documentation generated by the Documenter package
docs/build/
docs/site/
# File generated by Pkg, the package manager, based on a corresponding Project.toml
# It records a fixed state of all packages used by the project. As such, it should not be
# committed for packages, but should be committed for applications that require a static
# environment.
Manifest.toml
!.vscode/extensions.json

View file

@ -1,35 +0,0 @@
name = "Cowcalf_rumen_metagenomic_pipeline"
uuid = "7d0d08ee-6932-474e-8e49-cd3f4679ce2d"
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> and contributors"]
version = "0.1.0"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d"
Cowsay = "b6370f49-8ad1-4651-ad9e-3639b35da0e9"
Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
[compat]
CSV = "0.10.15"
Conda = "1.10.2"
Cowsay = "1.0.0"
Dagger = "0.18.14"
DataFrames = "1.7.0"
Distributed = "1.11.0"
Glob = "1.3.1"
HTTP = "1.10.16"
URIs = "1.5.2"
YAML = "0.4.13"
julia = "1.11"
[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
test = ["Test"]

View file

@ -1,8 +0,0 @@
channels:
- bioconda
- conda-forge
dependencies:
- metaxa=2.2
- blast-legacy=2.2.26
- hmmer=3.1
- mafft=7.525

View file

@ -1,90 +0,0 @@
module Cowcalf_rumen_metagenomic_pipeline
using Conda: Conda, runconda
using Cowsay: cowsay
using CSV: CSV
using DataFrames: DataFrame, rename!
using Distributed: pmap, @everywhere
using Glob: GlobMatch, glob
using HTTP: HTTP
using URIs: URI
using YAML: YAML
export main
include("Metaxa.jl")
using .Metaxa: Metaxa
_fetch_yaml_contents(yaml::AbstractString) = YAML.load_file(yaml)
_fetch_yaml_contents(yaml::URI) = YAML.load(String(HTTP.get(yaml).body))
function setup_remote_conda_environment(yaml::Union{AbstractString,URI}, env_name::Symbol)
ENV["CONDA_JL_USE_MINIFORGE"] = "1"
# Install x86 packages via Rosetta2 on MacOS
if Sys.isapple()
ENV["CONDA_SUBDIR"] = "osx-64"
runconda(`config --env --set subdir osx-64`, env_name)
end #if
conda_definition = _fetch_yaml_contents(yaml)
map(c -> Conda.add_channel(c, env_name), conda_definition["channels"])
Conda.add(conda_definition["dependencies"], env_name)
return env_name
end #function
function import_metadata(metadata_tsv::AbstractString)
df = DataFrame(CSV.File(metadata_tsv))
rename!(df, 1 => :sample_name)
return df
end #function
function sample_files(samplenames::Vector{<:AbstractString})
function _s(samplename::AbstractString)
# Use explicit GlobMatch constructor b/c we need to interpolate values
return (
samplename,
(
abspath(first(glob(GlobMatch("$(samplename)*1*.fastq.gz")))),
abspath(first(glob(GlobMatch("$(samplename)*2*.fastq.gz")))),
),
)
end #function
return map(_s, samplenames)
end #function
function (@main)(ARGS)
metadata_file = pop!(ARGS)
setup_remote_conda_environment(
URI(
"https://data.qiime2.org/distro/metagenome/qiime2-metagenome-2024.10-py310-osx-conda.yml",
),
:qiime2,
)
setup_remote_conda_environment(
joinpath(@__DIR__, "..", "conda_envs", "metaxa2.yml"),
:metaxa2,
)
metadata = import_metadata(metadata_file)
fastq_files = sample_files(metadata[!, :sample_name])
@eval begin
@everywhere begin
include(joinpath(@__DIR__, "Metaxa.jl"))
using .Metaxa: Metaxa
end #@everywhere
end #@eval
taxonomy_files = pmap(x -> Metaxa.taxonomy(first(x), last(x)), fastq_files)
feature_table = Metaxa.data_collector(taxonomy_files...)
cp(feature_table, pwd())
cowsay("Hello from Cowcalf_rumen_metagenomic_pipeline")
return 0
end #function
end #module

View file

@ -1,77 +0,0 @@
module Metaxa
using Conda: runconda
include("ProcessHelper.jl")
using .ProcessHelper: exec_in_temp_dir
export taxonomy
function _classifier(
samplename::AbstractString,
fastq1::AbstractString,
fastq2::AbstractString,
)
runconda(
`run metaxa2 \
-1 $fastq1 \
-2 $fastq2 \
-o $samplename \
--format fastq \
--cpu 4 \
--summary F \
--graphical F \
--fasta F \
--taxonomy T
`,
:metaxa2,
)
ispath("$samplename.taxonomy.txt") ||
error("metaxa2 ran, but $samplename.taxonomy.txt was not found!")
return abspath("$samplename.taxonomy.txt")
end #function
function _taxonomy_traversal(samplename::AbstractString, taxonomy::AbstractString)
runconda(
`run metaxa2_ttt \
-i $taxonomy \
-o $samplename \
-m 7 \
-n 7 \
--summary F
`,
:metaxa2,
)
ispath("$samplename.level_7.txt") ||
error("metaxa2 ran, but $samplename.level_7.txt was not found!")
return abspath("$samplename.level_7.txt")
end #function
function taxonomy(
samplename::AbstractString,
fastq::Tuple{<:AbstractString,<:AbstractString},
)
taxonomy_file = exec_in_temp_dir(_classifier, samplename, fastq...)
level_7_taxonomy_file = exec_in_temp_dir(_taxonomy_traversal, samplename, taxonomy_file)
return level_7_taxonomy_file
end #function
function _dc(taxonomies::AbstractString...)
runconda(
`metaxa2_dc \
-o feature-table.tsv \
$(join(taxonomies, ' '))
`,
:metaxa2,
)
ispath("feature-table.tsv") ||
error("metaxa2 ran, but feature-table.tsv was not found!")
return abspath("feature-table.tsv")
end #function
function data_collector(taxonomies::AbstractString...)
return exec_in_temp_dir(_dc, taxonomies...)
end #function
end #module

View file

@ -1,36 +0,0 @@
module ProcessHelper
export sym_temp
"""
sym_temp(file::AbstractString) -> (tmp_dir, link)
sym_temp(files::Tuple{<:AbstractString,<:AbstractString}) -> (tmp_dir, links)
Copies `file(s)` to a new temporary directory named `tmp_dir` and symbolically links them
inside of that directory. Returns a tuple of the directory path and the path to the links.
"""
function sym_temp(files::AbstractString...)
tmp_dir = mktempdir(; cleanup = false)
@info "Creating temporary directory $tmp_dir"
function _symlink(file::AbstractString)
symlink_path = joinpath(tmp_dir, basename(file))
symlink(realpath(file), symlink_path)
@info "Symlinked $file to $symlink_path"
return symlink_path
end #function
return (tmp_dir, map(_symlink, files))
end #function
function exec_in_temp_dir(f::Function, samplename::AbstractString, files::AbstractString...)
tmp_dir, tmp_files = sym_temp(files...)
return cd(() -> f(samplename, tmp_files...), tmp_dir)
end #function
function exec_in_temp_dir(f::Function, files::AbstractString...)
tmp_dir, tmp_files = sym_temp(files...)
return cd(() -> f(tmp_files...), tmp_dir)
end #function
end #module