diff --git a/.gitignore b/.gitignore index b7f3332..2ed722b 100644 --- a/.gitignore +++ b/.gitignore @@ -198,3 +198,35 @@ venv.bak/ .mypy_cache/ .dmypy.json dmypy.json + +# Ignore results files +Results.txt +results.txt + +### Julia.gitignore +# (https://github.com/github/gitignore/blob/master/Julia.gitignore) + +# Files generated by invoking Julia with --code-coverage +*.jl.cov +*.jl.*.cov + +# Files generated by invoking Julia with --track-allocation +*.jl.mem + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest.toml \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..ad578ed --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,14 @@ +# [:cow:]: The Code of the West + +## Ten principles to live by + +1. Live each day with courage. +2. Take pride in your work. +3. Always finish what you start. +4. Do what has to be done. +5. Be tough, but fair. +6. When you make a promise, keep it. +7. Ride for the brand. +8. Talk less and say more. +9. Remember that some things aren't for sale. +10. Know where to draw the line. diff --git a/Excel/Master BLUP Worksheet.xlsx b/Excel/Master BLUP Worksheet.xlsx index cb2624f..41e2174 100644 Binary files a/Excel/Master BLUP Worksheet.xlsx and b/Excel/Master BLUP Worksheet.xlsx differ diff --git a/Excel/van der Werf.xlsx b/Excel/van der Werf.xlsx new file mode 100644 index 0000000..ad7bd78 Binary files /dev/null and b/Excel/van der Werf.xlsx differ diff --git a/Julia/beefblup.jl b/Julia/beefblup.jl new file mode 100644 index 0000000..5cf891b --- /dev/null +++ b/Julia/beefblup.jl @@ -0,0 +1,287 @@ +# beefblup +# Main script for performing single-variate BLUP to find beef cattle +# breeding values +# Usage: julia beefblup.jl +# (C) 2020 Thomas A. Christensen II +# Licensed under BSD-3-Clause License + +# Import the required packages +using XLSX +using LinearAlgebra +using Dates +using Gtk + +# Display stuff +println("beefblup v 0.1") +println("(C) 2020 Thomas A. Christensen II") +println("https://github.com/millironx/beefblup") +print("\n") + +### Prompt User +# Ask for an input spreadsheet +path = open_dialog_native( + "Select a beefblup worksheet", + GtkNullContainer(), + ("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet")) +) + +# Ask for an output text filename +savepath = save_dialog_native( + "Save your beefblup results", + GtkNullContainer(), + (GtkFileFilter("*.txt", name="Results file"), + "*.txt") +) + +# Ask for heritability +print("What is the heritability for this trait?> ") +h2 = parse(Float64, readline(stdin)) + +### Import input filename +print("[🐮]: Importing Excel file...") + +# Import data from a suitable spreadsheet +data = XLSX.readxlsx(path)[1][:] + +print("Done!\n") + +### Process input file +print("[🐮]: Processing and formatting data...") + +# Extract the headers into a separate array +headers = data[1,:] +data = data[2:end,:] + +# Sort the array by date +data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2])) + +# Define fields to hold id values for animals and their parents +ids = string.(data[:,1]) +damids = string.(data[:,3]) +sireids = string.(data[:,4]) +numanimals = length(ids) + +# Find the index values for animals and their parents +dam = indexin(damids, ids) +sire = indexin(sireids, ids) + +# Store column numbers that need to be deleted +# Column 6 contains an intermediate Excel calculation and always need to +# be deleted +colstokeep = [1, 2, 3, 4, 5] + +# Find any columns that need to be deleted +for i in 7:length(headers) + if length(unique(data[:,i])) <= 1 + colname = headers[i] + print("Column '") + print(colname) + print("' does not have any unique animals and will be removed from this analysis\n") + else + push!(colstokeep, i) + end +end + +# Delete the appropriate columns from the datasheet and the headers +data = data[:, colstokeep] +headers = headers[colstokeep] + +# Determine how many contemporary groups there are +numgroups = ones(1, length(headers)-5) +for i in 6:length(headers) + numgroups[i-5] = length(unique(data[:,i])) +end + +# If there are more groups than animals, then the analysis cannot continue +if sum(numgroups) >= numanimals + println("There are more contemporary groups than animals. The analysis will + now abort.") + exit() +end + +# Define a "normal" animal as one of the last in the groups, provided that +# all traits do not have null values +normal = Array{String}(undef,1,length(headers)-5) +for i in 6:length(headers) + for j in numanimals:-1:1 + if !ismissing(data[j,i]) + normal[i-5] = string(data[j,i]) + break + end + end +end + +print("Done!\n") + +### Create the fixed-effect matrix +print("[🐮]: Creating the fixed-effect matrix...") + +# Form the fixed-effect matrix +X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1) +X[:,1] = ones(Int8, 1, numanimals) + +# Create an external counter that will increment through both loops +counter = 2 + +# Store the traits in a string array +adjustedtraits = +Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups)) +# Iterate through each group +for i in 1:length(normal) + # Find the traits that are present in this trait + localdata = string.(data[:,i+5]) + traits = unique(localdata) + # Remove the normal version from the analysis + effecttraits = traits[findall(x -> x != normal[i], traits)] + # Iterate inside of the group + for j in 1:length(effecttraits) + matchedindex = findall(x -> x != effecttraits[j], localdata) + X[matchedindex, counter] .= 1 + # Add this trait to the string + adjustedtraits[counter - 1] = traits[j] + # Increment the big counter + global counter = counter + 1 + end +end + +print("Done!\n") + +### Additive relationship matrix +print("[🐮]: Creating additive relationship matrix...") + +# Create an empty matrix for the additive relationship matrix +A = zeros(numanimals, numanimals) + +# Create the additive relationship matrix by the FORTRAN method presented by +# Henderson +for i in 1:numanimals + if !isnothing(dam[i]) && !isnothing(sire[i]) + for j in 1:(i-1) + A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]]) + A[i,j] = A[j,i] + end + A[i,i] = 1 + 0.5*A[sire[i], dam[i]] + elseif !isnothing(dam[i]) && isnothing(sire[i]) + for j in 1:(i-1) + A[j,i] = 0.5*A[j,dam[i]] + A[i,j] = A[j,i] + end + A[i,i] = 1 + elseif isnothing(dam[i]) && !isnothing(sire[i]) + for j in 1:(i-1) + A[j,i] = 0.5*A[j,sire[i]] + A[i,j] = A[j,i] + end + A[i,i] = 1 + else + for j in 1:(i-1) + A[j,i] = 0 + A[i,j] = 0 + end + A[i,i] = 1 + end +end + +print("Done!\n") + +### Perform BLUP +print("[🐮]: Solving the mixed-model equations...") + +# Extract the observed data +Y = convert(Array{Float64}, data[:,5]) + +# The random effects matrix +Z = Matrix{Int}(I, numanimals, numanimals) + +# Remove items where there is no data +nullobs = findall(isnothing, Y) +Z[nullobs, nullobs] .= 0 + +# Calculate heritability +λ = (1-h2)/h2 + +# Use the mixed-model equations +MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)] +MMY = [X'*Y; Z'*Y] +solutions = MME\MMY + +# Find the accuracies +diaginv = diag(inv(MME)) +reliability = ones(Float64, length(diaginv)) - diaginv.*λ + +print("Done!\n") + +### Output the results +print("[🐮]: Saving results...") + +# Find how many traits we found BLUE for +numgroups = numgroups .- 1 + +# Start printing results to output +fileID = open(savepath, "w") +write(fileID, "beefblup Results Report\n") +write(fileID, "Produced using beefblup for Julia (") +write(fileID, "https://github.com/millironx/beefblup") +write(fileID, ")\n\n") +write(fileID, "Input:\t") +write(fileID, path) +write(fileID, "\nAnalysis performed:\t") +write(fileID, string(Dates.today())) +write(fileID, "\nTrait examined:\t") +write(fileID, headers[5]) +write(fileID, "\n\n") + +# Print base population stats +write(fileID, "Base Population:\n") +for i in 1:length(numgroups) + write(fileID, "\t") + write(fileID, headers[i+5]) + write(fileID, ":\t") + write(fileID, normal[i]) + write(fileID, "\n") +end +write(fileID, "\tMean ") +write(fileID, headers[5]) +write(fileID, ":\t") +write(fileID, string(solutions[1])) +write(fileID, "\n\n") + +# Contemporary group adjustments +counter = 2 +write(fileID, "Contemporary Group Effects:\n") +for i in 1:length(numgroups) + write(fileID, "\t") + write(fileID, headers[i+5]) + write(fileID, "\tEffect\tReliability\n") + for j in 1:numgroups[i] + write(fileID, "\t") + write(fileID, adjustedtraits[counter - 1]) + write(fileID, "\t") + write(fileID, string(solutions[counter])) + write(fileID, "\t") + write(fileID, string(reliability[counter])) + write(fileID, "\n") + + global counter = counter + 1 + end + write(fileID, "\n") +end +write(fileID, "\n") + +# Expected breeding values +write(fileID, "Expected Breeding Values:\n") +write(fileID, "\tID\tEBV\tReliability\n") +for i in 1:numanimals + write(fileID, "\t") + write(fileID, ids[i]) + write(fileID, "\t") + write(fileID, string(solutions[i+counter-1])) + write(fileID, "\t") + write(fileID, string(reliability[i+counter-1])) + write(fileID, "\n") +end + +write(fileID, "\n - END REPORT -") +close(fileID) + +print("Done!\n") diff --git a/Julia/install.jl b/Julia/install.jl new file mode 100644 index 0000000..2de4ccd --- /dev/null +++ b/Julia/install.jl @@ -0,0 +1,13 @@ +# beefblup install +# Prepares the Julia environment for using beefblup by installing the requisite +# packages +# Usage: julia install.jl +# (C) 2020 Thomas A. Christensen II +# Licensed under BSD-3-Clause License + +# Import the package manager +using Pkg + +# Install requisite packages +Pkg.add("XLSX") +Pkg.add("Gtk") \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md index e50a0eb..08e6c89 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2018, Thomas A. Christensen II +Copyright (c) 2020, Thomas A. Christensen II All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/MATLAB/beefblup.m b/MATLAB/beefblup.m index 242fd29..d26f887 100644 --- a/MATLAB/beefblup.m +++ b/MATLAB/beefblup.m @@ -2,7 +2,7 @@ % Main script for performing single-variate BLUP to find beef cattle % breeding values % Usage: beefblup -% (C) 2018 Thomas A. Christensen II +% (C) 2020 Thomas A. Christensen II % Licensed under BSD-3-Clause License % Prepare the workspace for computation @@ -11,8 +11,8 @@ clc close all %% Display stuff -disp('beefblup v. 0.0.0.1') -disp('(C) 2018 Thomas A. Christensen II') +disp('beefblup v. 0.1') +disp('(C) 2020 Thomas A. Christensen II') disp('https://github.com/millironx/beefblup') disp(' ') @@ -77,7 +77,7 @@ for i=1:numanimals dammatch = ismember(damids, ids(i,:), 'rows'); damindexes = find(dammatch == 1); dam(damindexes) = i; - + % Find all animals that this animal sired sirematch = ismember(sireids, ids(i,:), 'rows'); sireindexes = find(sirematch == 1); @@ -91,7 +91,7 @@ colstodelete = 6; % Coerce each group to string format for i = 7:length(headers) - data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false); + data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false); end % Find any columns that need to be deleted @@ -100,7 +100,7 @@ for i = 7:length(headers) colname = headers{i}; disp(['Column "' colname '" does not have any unique animals and will be removed']) disp('from this analysis'); - colstodelete = [colstodelete i]; + colstodelete = [colstodelete i]; end end @@ -167,19 +167,19 @@ adjustedtraits = cell(1, sum(numgroups)-length(numgroups)); for i = 1:length(normal) % Find the traits that are present in this trait traits = uniquecell(data(:,i+5)); - + % Remove the "normal" version from the analysis normalindex = find(strcmp(traits, normal{i})); traits(normalindex) = []; - + % Iterate inside of the group for j = 1:length(traits) matchedindex = find(strcmp(data(:,i+5), traits{j})); X(matchedindex, I) = 1; - + % Add this trait to the string adjustedtraits(I - 1) = traits(j); - + % Increment the big counter I = I + 1; end @@ -217,7 +217,7 @@ for i = 1:numanimals A(j,i) = 0.5*A(j,sire(i)); A(i,j) = A(j,i); end - A(i,i) = 1; + A(i,i) = 1; else for j = 1:(i-1) A(j,i) = 0; @@ -313,7 +313,7 @@ for i = 1:length(numgroups) fprintf(fileID, '\t'); fprintf(fileID, num2str(reliability(I))); fprintf(fileID, '\n'); - + I = I + 1; end fprintf(fileID, '\n'); diff --git a/README.md b/README.md index a9e14d9..6faef51 100644 --- a/README.md +++ b/README.md @@ -1,63 +1,82 @@ -# beefblup +# [:cow:]: beefblup +[![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md) [![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases) -:cow: :cow: :cow: +beefblup is a program for ranchers to calculate expected breeding +values (EBVs) for their own beef cattle. It is intended to be usable by anyone +without requiring any prior knowledge of computer programming or linear algebra. +Why? It's part of my effort to +**\#KeepEPDsReal** -#### \#KeepEPDsReal +## Installation -MATLAB and Python scripts and Excel spreadsheets that can be used in conjunction to find breeding values for beef cattle. +1. [Download and install Julia](https://julialang.org/downloads/platform/) +2. Open a new Julia window and type the `]` key +3. Type `add XLSX Gtk` and press **Enter** + +Alternatively, you can run the [install +script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from +Julia. ## How to Use -1. Download the [Excel template](https://github.com/MillironX/beefblup/raw/master/Excel/Master%20BLUP%20Worksheet.xlsx) -2. Place your data into the structure described by the spreadsheet -3. If you wish to add more contemporary group traits to your analysis, replace or add them to the right of the Purple section -4. Open MATLAB -5. Enter the following lines in the command window: +> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118). +> Although it's tempting to just open up beefblup in Juno and press the big play +> button, it won't work. Follow these instructions until it's fixed. If you +> don't know what Juno is: ignore this message. - ``` - websave('beefblup.zip','https://github.com/MillironX/beefblup/archive/master.zip'); - unzip('beefblup.zip'); - cd beefblup-master/MATLAB - beefblup - ``` - -6. Select the spreadsheet file you just placed your data into -7. Select a file that you would like to save your results to -8. Breeding values and contemporary group adjustments will be outputted to the file you selected +1. Download the [beefblup ZIP + file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it + someplace memorable +2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own +3. If you wish to add more contemporary group traits to your analysis, replace + or add them to the right of the Purple section +4. Save and close +5. In your file explorer, copy the address of the "Julia" folder +6. Launch Julia +7. Type `cd(" **Also Note:** beefblup was written on, and has only been tested with Julia +> v1.2.0 and higher. While this shouldn't affect most everyday users, it might +> affect you if you are stuck on the current LTS version of Julia (v1.0.5). -* Convert MATLAB scripts to Python - * The product must be able to be run from the native (non-python) terminal using only the default [Anaconda Python packages](https://anaconda.com/distribution) -* Optimizing code sections - * Use triagonal shortcuts to generate the additive relationship matrix - * Solve implicit forms of the mixed-model equation - * Perform cannonical transformations for missing values - * Other similar improvements that I might not be aware of -* Creation of scripts to handle additional forms of BLUP - * Mult-trait (MBLUP) - * Maternal-trait - * Genomic-enhanced (GBLUP) - this will require the creation of a standard SNP spreadsheet format -* Creation of spreadsheets for additional traits -* Creation of wiki pages to explain what each script does - * The general rule is that **every** wiki page should be understandable to anyone who's passed high school algebra, while still being correct and informative - +### Development Roadmap +| Version | Feature | +| ------- | ------------------------------------------------------------------- | +| v0.1 | Julia port of original MATLAB script | +| v0.2 | Spreadsheet format redesign | +| v0.3 | API rewrite (change to function calls and package format instead of script running) | +| v0.4 | Add GUI for all options | +| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | +| v0.6 | Repeated measurement BLUP (aka dairyblup) | +| v0.7 | Multiple trait BLUP | +| v0.8 | Maternal effects BLUP | +| v0.9 | Genomic BLUP | +| v0.10 | beefblup binaries | +| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | -Note that I intend to implement all of the items above eventually, but progress is slow since I'm learning as I go. +### Bug Reports -If you are writing code, please keep the code clean: +For every bug report, please include at least the following: -* Run "Smart Indent" in the MATLAB editor on the entire file before checking it in -* Name variables in full word English using all lowercase, unless the matrix name is generally agreed upon in academic papers (i.e. A is the additive relationship matrix) -* For MATLAB, functions go in a separate file -* Comments go before a code block: no inline comments - -Bug reports and suggestions will be gladly taken on the [issues](https://github.com/MillironX/beefblup/issues) page. There is no set format for issues, yet, but please at the minimum attach a filled-out spreadsheet that demonstrates your bug or how your suggestion would be useful. +- Platform (Windows, Mac, Fedora, etc) +- Julia version +- beefblup version +- How you are running Julia (From PowerShell, via the REPL, etc.) +- A beefblup spreadsheet that can be used to recreate the issue +- Description of the problem +- Expected behavior +- A screenshot and/or REPL printout ## License