diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml deleted file mode 100644 index 1e72b50..0000000 --- a/.JuliaFormatter.toml +++ /dev/null @@ -1 +0,0 @@ -style="blue" diff --git a/.gitignore b/.gitignore index f9c1b1b..3b1733d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,30 +2,4 @@ !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json -!.vscode/extensions.json - -### Julia gitignore ### -## Files generated by invoking Julia with --code-coverage -*.jl.cov -*.jl.*.cov - -# Files generated by invoking Julia with --track-allocation -*.jl.mem - -# System-specific files and directories generated by the BinaryProvider and BinDeps packages -# They contain absolute paths specific to the host computer, and so should not be committed -deps/deps.jl -deps/build.log -deps/downloads/ -deps/usr/ -deps/src/ - -# Build artifacts for creating documentation generated by the Documenter package -docs/build/ -docs/site/ - -# File generated by Pkg, the package manager, based on a corresponding Project.toml -# It records a fixed state of all packages used by the project. As such, it should not be -# committed for packages, but should be committed for applications that require a static -# environment. -Manifest.toml +!.vscode/extensions.json \ No newline at end of file diff --git a/Project.toml b/Project.toml deleted file mode 100644 index ed6c11c..0000000 --- a/Project.toml +++ /dev/null @@ -1,35 +0,0 @@ -name = "Cowcalf_rumen_metagenomic_pipeline" -uuid = "7d0d08ee-6932-474e-8e49-cd3f4679ce2d" -authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com> and contributors"] -version = "0.1.0" - -[deps] -CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" -Cowsay = "b6370f49-8ad1-4651-ad9e-3639b35da0e9" -Dagger = "d58978e5-989f-55fb-8d15-ea34adc7bf54" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" -Glob = "c27321d9-0574-5035-807b-f59d2c89b15c" -HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" -URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" - -[compat] -CSV = "0.10.15" -Conda = "1.10.2" -Cowsay = "1.0.0" -Dagger = "0.18.14" -DataFrames = "1.7.0" -Distributed = "1.11.0" -Glob = "1.3.1" -HTTP = "1.10.16" -URIs = "1.5.2" -YAML = "0.4.13" -julia = "1.11" - -[extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test"] diff --git a/conda_envs/metaxa2.yml b/conda_envs/metaxa2.yml deleted file mode 100644 index 4a27d02..0000000 --- a/conda_envs/metaxa2.yml +++ /dev/null @@ -1,8 +0,0 @@ -channels: - - bioconda - - conda-forge -dependencies: - - metaxa=2.2 - - blast-legacy=2.2.26 - - hmmer=3.1 - - mafft=7.525 diff --git a/src/Cowcalf_rumen_metagenomic_pipeline.jl b/src/Cowcalf_rumen_metagenomic_pipeline.jl deleted file mode 100644 index eba501a..0000000 --- a/src/Cowcalf_rumen_metagenomic_pipeline.jl +++ /dev/null @@ -1,90 +0,0 @@ -module Cowcalf_rumen_metagenomic_pipeline - -using Conda: Conda, runconda -using Cowsay: cowsay -using CSV: CSV -using DataFrames: DataFrame, rename! -using Distributed: pmap, @everywhere -using Glob: GlobMatch, glob -using HTTP: HTTP -using URIs: URI -using YAML: YAML - -export main - -include("Metaxa.jl") -using .Metaxa: Metaxa - -_fetch_yaml_contents(yaml::AbstractString) = YAML.load_file(yaml) -_fetch_yaml_contents(yaml::URI) = YAML.load(String(HTTP.get(yaml).body)) - -function setup_remote_conda_environment(yaml::Union{AbstractString,URI}, env_name::Symbol) - ENV["CONDA_JL_USE_MINIFORGE"] = "1" - - # Install x86 packages via Rosetta2 on MacOS - if Sys.isapple() - ENV["CONDA_SUBDIR"] = "osx-64" - runconda(`config --env --set subdir osx-64`, env_name) - end #if - - conda_definition = _fetch_yaml_contents(yaml) - - map(c -> Conda.add_channel(c, env_name), conda_definition["channels"]) - Conda.add(conda_definition["dependencies"], env_name) - - return env_name -end #function - -function import_metadata(metadata_tsv::AbstractString) - df = DataFrame(CSV.File(metadata_tsv)) - rename!(df, 1 => :sample_name) - return df -end #function - -function sample_files(samplenames::Vector{<:AbstractString}) - function _s(samplename::AbstractString) - # Use explicit GlobMatch constructor b/c we need to interpolate values - return ( - samplename, - ( - abspath(first(glob(GlobMatch("$(samplename)*1*.fastq.gz")))), - abspath(first(glob(GlobMatch("$(samplename)*2*.fastq.gz")))), - ), - ) - end #function - - return map(_s, samplenames) -end #function - -function (@main)(ARGS) - metadata_file = pop!(ARGS) - - setup_remote_conda_environment( - URI( - "https://data.qiime2.org/distro/metagenome/qiime2-metagenome-2024.10-py310-osx-conda.yml", - ), - :qiime2, - ) - setup_remote_conda_environment( - joinpath(@__DIR__, "..", "conda_envs", "metaxa2.yml"), - :metaxa2, - ) - - metadata = import_metadata(metadata_file) - fastq_files = sample_files(metadata[!, :sample_name]) - - @eval begin - @everywhere begin - include(joinpath(@__DIR__, "Metaxa.jl")) - using .Metaxa: Metaxa - end #@everywhere - end #@eval - taxonomy_files = pmap(x -> Metaxa.taxonomy(first(x), last(x)), fastq_files) - feature_table = Metaxa.data_collector(taxonomy_files...) - cp(feature_table, pwd()) - - cowsay("Hello from Cowcalf_rumen_metagenomic_pipeline") - return 0 -end #function - -end #module diff --git a/src/Metaxa.jl b/src/Metaxa.jl deleted file mode 100644 index 4b93d19..0000000 --- a/src/Metaxa.jl +++ /dev/null @@ -1,77 +0,0 @@ -module Metaxa - -using Conda: runconda - -include("ProcessHelper.jl") - -using .ProcessHelper: exec_in_temp_dir - -export taxonomy - -function _classifier( - samplename::AbstractString, - fastq1::AbstractString, - fastq2::AbstractString, -) - runconda( - `run metaxa2 \ - -1 $fastq1 \ - -2 $fastq2 \ - -o $samplename \ - --format fastq \ - --cpu 4 \ - --summary F \ - --graphical F \ - --fasta F \ - --taxonomy T - `, - :metaxa2, - ) - ispath("$samplename.taxonomy.txt") || - error("metaxa2 ran, but $samplename.taxonomy.txt was not found!") - return abspath("$samplename.taxonomy.txt") -end #function - -function _taxonomy_traversal(samplename::AbstractString, taxonomy::AbstractString) - runconda( - `run metaxa2_ttt \ - -i $taxonomy \ - -o $samplename \ - -m 7 \ - -n 7 \ - --summary F - `, - :metaxa2, - ) - ispath("$samplename.level_7.txt") || - error("metaxa2 ran, but $samplename.level_7.txt was not found!") - return abspath("$samplename.level_7.txt") -end #function - -function taxonomy( - samplename::AbstractString, - fastq::Tuple{<:AbstractString,<:AbstractString}, -) - taxonomy_file = exec_in_temp_dir(_classifier, samplename, fastq...) - level_7_taxonomy_file = exec_in_temp_dir(_taxonomy_traversal, samplename, taxonomy_file) - return level_7_taxonomy_file -end #function - -function _dc(taxonomies::AbstractString...) - runconda( - `metaxa2_dc \ - -o feature-table.tsv \ - $(join(taxonomies, ' ')) - `, - :metaxa2, - ) - ispath("feature-table.tsv") || - error("metaxa2 ran, but feature-table.tsv was not found!") - return abspath("feature-table.tsv") -end #function - -function data_collector(taxonomies::AbstractString...) - return exec_in_temp_dir(_dc, taxonomies...) -end #function - -end #module diff --git a/src/ProcessHelper.jl b/src/ProcessHelper.jl deleted file mode 100644 index ca01370..0000000 --- a/src/ProcessHelper.jl +++ /dev/null @@ -1,36 +0,0 @@ -module ProcessHelper - -export sym_temp - -""" - sym_temp(file::AbstractString) -> (tmp_dir, link) - sym_temp(files::Tuple{<:AbstractString,<:AbstractString}) -> (tmp_dir, links) - -Copies `file(s)` to a new temporary directory named `tmp_dir` and symbolically links them -inside of that directory. Returns a tuple of the directory path and the path to the links. -""" -function sym_temp(files::AbstractString...) - tmp_dir = mktempdir(; cleanup = false) - @info "Creating temporary directory $tmp_dir" - - function _symlink(file::AbstractString) - symlink_path = joinpath(tmp_dir, basename(file)) - symlink(realpath(file), symlink_path) - @info "Symlinked $file to $symlink_path" - return symlink_path - end #function - - return (tmp_dir, map(_symlink, files)) -end #function - -function exec_in_temp_dir(f::Function, samplename::AbstractString, files::AbstractString...) - tmp_dir, tmp_files = sym_temp(files...) - return cd(() -> f(samplename, tmp_files...), tmp_dir) -end #function - -function exec_in_temp_dir(f::Function, files::AbstractString...) - tmp_dir, tmp_files = sym_temp(files...) - return cd(() -> f(tmp_files...), tmp_dir) -end #function - -end #module