1
0
Fork 0
mirror of https://github.com/MillironX/beefblup.git synced 2024-11-10 18:23:08 +00:00

Compare commits

..

75 commits

Author SHA1 Message Date
62f5c4ea65
Update report generation to match new fixed effect format 2021-08-31 20:02:17 -05:00
683c48aa45
Bug fix for not calling propertynames 2021-08-31 19:46:02 -05:00
d4bb72c458
Revamp fixed-effect algorithm 2021-08-31 19:24:07 -05:00
4b66ad8b1f
Make column names more robust 2021-08-31 18:27:16 -05:00
91fc553c4c
Fix documentation syntax bugs 2021-08-29 08:08:23 -05:00
16aae0469e
Fix tests 2021-08-28 19:28:25 -05:00
97bd43323d
Convert wiki to docs 2021-08-28 19:25:17 -05:00
b4e0591ed2
Update badges to work on 'develop' branch 2021-08-28 18:11:20 -05:00
a9ab1e8641
Push pedigree matrix to its own function 2021-08-28 18:07:22 -05:00
3782de85ac
Mark beefblup CLI as Julia file 2021-08-09 21:06:21 -05:00
9036ace524
Remove uneeded test script file 2021-08-09 21:02:20 -05:00
c318ded0a6
Make Julia version correct in docs CI 2021-08-09 20:47:26 -05:00
74f15be400
New documentation CI approach 2021-08-09 20:46:18 -05:00
091b757b00
Add headless display server to CI 2021-08-09 19:57:32 -05:00
a1d4e6b063
Fix documentation CI 2021-08-09 19:31:25 -05:00
79e158d83e
Merge tag 'v0.2.2' into develop
New methods for making GitHub recognize beefblup as a Julia program
2021-08-09 19:11:52 -05:00
94a2e2e124 Merge branch 'hotfix/v0.2.2'
Last GitHub fix didn't take
2021-08-09 19:10:04 -05:00
f7f09cec90
Declare beefblup.jl as Julia using Emacs modeline 2021-08-09 19:09:45 -05:00
12e95d4a7a
Declare all jl files as Julia regardless of shebang 2021-08-09 19:09:28 -05:00
7b7ab22123
Bump version number 2021-08-09 19:08:42 -05:00
cc97c99a68 Merge tag 'v0.2.1' into develop
Fix GitHub's language detection
2021-08-09 19:00:35 -05:00
a6bd3d7a29 Merge branch 'hotfix/v0.2.1'
Fix GitHub's language-detection for beefblup
2021-08-09 18:59:50 -05:00
489274d557
Bump version number 2021-08-09 18:59:09 -05:00
66fc4b902f
Mark beefblup as Julia on GitHub 2021-08-09 18:57:09 -05:00
5e1c5f7707 Merge branch 'master' into develop
CI from GitHub workflows must be performed in 'master' branch, even
though the work most directly affected the 'develop' branch.
2021-08-09 18:33:30 -05:00
27199ea748
Update Documentation.yml 2021-07-07 09:47:05 -05:00
e18afb68df
Update Documentation.yml 2021-07-07 09:46:08 -05:00
3f55072ecd
Fix type 2021-07-07 09:43:57 -05:00
15bdebb17d
Create Documentation.yml 2021-07-07 09:42:20 -05:00
d87ee85795
Set docs to correct place [ci skip] 2021-07-07 09:35:40 -05:00
b3162345ff
Make docs point to this repository 2021-07-07 09:33:10 -05:00
c3d905ff5e
More Travis GUI updates 2021-06-19 20:56:14 -05:00
615766901c
Try adding GUI to Travis config
https://stackoverflow.com/a/37330054/3922521
2021-06-19 20:34:51 -05:00
d9fde2af8f
Enable TravisCI 2021-06-19 20:15:40 -05:00
141cf56b07
Add basic test for correct fixed effect matrix 2021-06-19 20:11:25 -05:00
181819db28
Refactor fixed effect solver into its own function 2021-06-19 19:56:32 -05:00
f5f1dfad13
Remove reference to blue coding style 2021-06-19 19:28:05 -05:00
7ce74be4d4
Merge branch 'pkg-template' into develop 2021-06-19 19:24:46 -05:00
0a537f9928
Run Julia template engine for CI purposes 2021-06-19 19:16:34 -05:00
95fd34ce3a
Bootstrap testing framework 2021-06-19 18:49:00 -05:00
93564b247e
Reformat document 2021-06-19 18:27:52 -05:00
289984be2f
Fix running without arguments 2021-06-19 16:13:07 -05:00
6e311141ac
Create debug config 2021-06-19 11:54:51 -05:00
fa42f3635f
Remove debug info from cli 2021-06-19 11:54:34 -05:00
5c5ac1654c
Add argument parsing to cli 2021-06-19 11:50:26 -05:00
1885aa15ef
Fix results file name creation 2021-06-19 11:50:02 -05:00
530258a2c8
Qualify column deletion function 2021-06-19 11:07:13 -05:00
9b181d251c
Update deprecated column deletion syntax 2021-06-19 11:05:02 -05:00
610c80e7c9
Use Julia error/warn systems instead of printing to stdout 2021-06-19 10:01:51 -05:00
f985f4a700
Remove verbose text output 2021-06-18 15:27:18 -05:00
7cf650992d
Add results filename assumption 2021-06-18 15:25:34 -05:00
5048d3a1b8
Split into input and worker functions 2021-06-18 14:13:28 -05:00
2dd48631ea Merge branch 'develop'
Started developing new features on master
Need to keep commits but rollback master to usable state
2021-06-18 13:55:12 -05:00
6b5b775a83
Create bash wrapper for function call 2021-06-18 13:51:37 -05:00
b61eae3324
Change main script to module and function call 2021-06-18 13:51:26 -05:00
1928eb3c4f
Rename in accordance with Julia module standards 2021-06-18 13:45:57 -05:00
97ed1662a8
Add package info back into project manifest 2021-06-18 13:43:36 -05:00
80a21e42e0 Merge tag 'v0.2' into develop
Spreadsheet format redesign
2021-06-18 13:27:52 -05:00
487f1c30df Merge branch 'release/v0.2' 2021-06-18 13:27:18 -05:00
6573492bac
Change copyright/release numbers 2021-06-18 13:26:35 -05:00
0de4b7dc0c
Remove package declaration from project manifest
This was a test for an easier install of beefblup, but installing from the
manifest makes Julia run the script instead of install the dependencies.
API rewrite is slated for v0.3, so this item will be taken care of then.
2021-06-18 13:26:16 -05:00
92e03a95ef
Add package info to project manifest 2021-06-18 12:54:55 -05:00
74683f03d5
Fix report generation 2021-06-18 12:46:01 -05:00
2882f5b932
Fix equation solving step 2021-06-18 12:36:38 -05:00
82bf3cfa89
Switch to UNIX line endings 2021-06-18 12:07:58 -05:00
de0eb6f5a9
Fix spell checking so I can focus on real problems 2021-06-18 12:05:55 -05:00
4efeb5d53a
Add auto-project-loading shebang 2021-06-18 10:46:21 -05:00
d0b0e79b38
Switch to Julia's package install method 2021-06-18 10:42:12 -05:00
f450295b20
Initial pass at input file change
Known to produce wrong results
2021-06-18 10:38:49 -05:00
5d7ecceef8
Rename 'Julia' folder to 'src' folder for consistancy with packages 2021-01-04 02:25:10 -07:00
ec3356fcf3
Reformat spreadsheet 2021-01-04 02:24:39 -07:00
05ee8018c2
Remove Matlab script files 2021-01-04 02:16:40 -07:00
dbd1dc7b4d
Delete LICENSE.md 2020-10-11 21:25:18 -06:00
8fc1bc3f4a
Create Github-template LICENSE file 2020-10-11 21:24:56 -06:00
6f73731d0a
Reformat license file to match choosealicense.com 2020-10-11 21:22:35 -06:00
25 changed files with 1243 additions and 913 deletions

2
.gitattributes vendored
View file

@ -15,3 +15,5 @@
*.pdf -crlf -diff -merge *.pdf -crlf -diff -merge
*.jpg -crlf -diff -merge *.jpg -crlf -diff -merge
*.png -crlf -diff -merge *.png -crlf -diff -merge
*.jl linguist-language=Julia
beefblup linguist-language=Julia

26
.github/workflows/Documentation.yml vendored Normal file
View file

@ -0,0 +1,26 @@
name: Documentation
on:
push:
branches:
- main
- develop
pull_request:
branches:
- master
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DISPLAY: ':99.0'
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@latest
with:
version: '1.5'
- run: sudo apt-get install xvfb -y
- run: xvfb-run --auto-servernum julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
- run: xvfb-run --auto-servernum julia --project=docs/ docs/make.jl

195
.gitignore vendored
View file

@ -1,60 +1,3 @@
## MATLAB gitignores
# Windows default autosave extension
*.asv
# OSX / *nix default autosave extension
*.m~
# Compiled MEX binaries (all platforms)
*.mex*
# Packaged app and toolbox files
*.mlappinstall
*.mltbx
# Generated helpsearch folders
helpsearch*/
# Simulink code generation folders
slprj/
sccprj/
# Matlab code generation folders
codegen/
# Simulink autosave extension
*.autosave
# Octave session info
octave-workspace
## Windows gitignores
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
## Visual Studio Code gitignores ## Visual Studio Code gitignores
.vscode/* .vscode/*
@ -62,145 +5,9 @@ $RECYCLE.BIN/
!.vscode/tasks.json !.vscode/tasks.json
!.vscode/launch.json !.vscode/launch.json
!.vscode/extensions.json !.vscode/extensions.json
.vscode/settings.json !.vscode/settings.json
## Microsoft Office gitignores
*.tmp
# Word temporary
~$*.doc*
# Word Auto Backup File
Backup of *.doc*
# Excel temporary
~$*.xls*
# Excel Backup File
*.xlk
# PowerPoint temporary
~$*.ppt*
# Visio autosave temporary files
*.~vsd*
## Python gitignores
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Ignore results files # Ignore results files
Results.txt
results.txt results.txt
### Julia.gitignore ### Julia.gitignore

15
.travis.yml Normal file
View file

@ -0,0 +1,15 @@
language: julia
os:
- linux
services:
- xvfb
julia:
- 1.3
- nightly
matrix:
allow_failures:
- julia: nightly
fast_finish: true
notifications:
email: false
codecov: true

18
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,18 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "julia",
"request": "launch",
"name": "beefblup CLI debug",
"program": "${file}",
"stopOnEntry": false,
"cwd": "${workspaceFolder}",
"juliaEnv": "${command:activeJuliaEnvironment}",
"args": ["data/sample.csv", "-t 0.5"]
}
]
}

10
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,10 @@
{
"cSpell.words": [
"BLUP",
"EBVs",
"EPDs",
"autob",
"beefblup",
"dairyblup"
]
}

Binary file not shown.

Binary file not shown.

View file

@ -1,287 +0,0 @@
# beefblup
# Main script for performing single-variate BLUP to find beef cattle
# breeding values
# Usage: julia beefblup.jl
# (C) 2020 Thomas A. Christensen II
# Licensed under BSD-3-Clause License
# Import the required packages
using XLSX
using LinearAlgebra
using Dates
using Gtk
# Display stuff
println("beefblup v 0.1")
println("(C) 2020 Thomas A. Christensen II")
println("https://github.com/millironx/beefblup")
print("\n")
### Prompt User
# Ask for an input spreadsheet
path = open_dialog_native(
"Select a beefblup worksheet",
GtkNullContainer(),
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
)
# Ask for an output text filename
savepath = save_dialog_native(
"Save your beefblup results",
GtkNullContainer(),
(GtkFileFilter("*.txt", name="Results file"),
"*.txt")
)
# Ask for heritability
print("What is the heritability for this trait?> ")
h2 = parse(Float64, readline(stdin))
### Import input filename
print("[🐮]: Importing Excel file...")
# Import data from a suitable spreadsheet
data = XLSX.readxlsx(path)[1][:]
print("Done!\n")
### Process input file
print("[🐮]: Processing and formatting data...")
# Extract the headers into a separate array
headers = data[1,:]
data = data[2:end,:]
# Sort the array by date
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
# Define fields to hold id values for animals and their parents
ids = string.(data[:,1])
damids = string.(data[:,3])
sireids = string.(data[:,4])
numanimals = length(ids)
# Find the index values for animals and their parents
dam = indexin(damids, ids)
sire = indexin(sireids, ids)
# Store column numbers that need to be deleted
# Column 6 contains an intermediate Excel calculation and always need to
# be deleted
colstokeep = [1, 2, 3, 4, 5]
# Find any columns that need to be deleted
for i in 7:length(headers)
if length(unique(data[:,i])) <= 1
colname = headers[i]
print("Column '")
print(colname)
print("' does not have any unique animals and will be removed from this analysis\n")
else
push!(colstokeep, i)
end
end
# Delete the appropriate columns from the datasheet and the headers
data = data[:, colstokeep]
headers = headers[colstokeep]
# Determine how many contemporary groups there are
numgroups = ones(1, length(headers)-5)
for i in 6:length(headers)
numgroups[i-5] = length(unique(data[:,i]))
end
# If there are more groups than animals, then the analysis cannot continue
if sum(numgroups) >= numanimals
println("There are more contemporary groups than animals. The analysis will
now abort.")
exit()
end
# Define a "normal" animal as one of the last in the groups, provided that
# all traits do not have null values
normal = Array{String}(undef,1,length(headers)-5)
for i in 6:length(headers)
for j in numanimals:-1:1
if !ismissing(data[j,i])
normal[i-5] = string(data[j,i])
break
end
end
end
print("Done!\n")
### Create the fixed-effect matrix
print("[🐮]: Creating the fixed-effect matrix...")
# Form the fixed-effect matrix
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
X[:,1] = ones(Int8, 1, numanimals)
# Create an external counter that will increment through both loops
counter = 2
# Store the traits in a string array
adjustedtraits =
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
# Iterate through each group
for i in 1:length(normal)
# Find the traits that are present in this trait
localdata = string.(data[:,i+5])
traits = unique(localdata)
# Remove the normal version from the analysis
effecttraits = traits[findall(x -> x != normal[i], traits)]
# Iterate inside of the group
for j in 1:length(effecttraits)
matchedindex = findall(x -> x != effecttraits[j], localdata)
X[matchedindex, counter] .= 1
# Add this trait to the string
adjustedtraits[counter - 1] = traits[j]
# Increment the big counter
global counter = counter + 1
end
end
print("Done!\n")
### Additive relationship matrix
print("[🐮]: Creating additive relationship matrix...")
# Create an empty matrix for the additive relationship matrix
A = zeros(numanimals, numanimals)
# Create the additive relationship matrix by the FORTRAN method presented by
# Henderson
for i in 1:numanimals
if !isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i-1)
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
A[i,j] = A[j,i]
end
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
elseif !isnothing(dam[i]) && isnothing(sire[i])
for j in 1:(i-1)
A[j,i] = 0.5*A[j,dam[i]]
A[i,j] = A[j,i]
end
A[i,i] = 1
elseif isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i-1)
A[j,i] = 0.5*A[j,sire[i]]
A[i,j] = A[j,i]
end
A[i,i] = 1
else
for j in 1:(i-1)
A[j,i] = 0
A[i,j] = 0
end
A[i,i] = 1
end
end
print("Done!\n")
### Perform BLUP
print("[🐮]: Solving the mixed-model equations...")
# Extract the observed data
Y = convert(Array{Float64}, data[:,5])
# The random effects matrix
Z = Matrix{Int}(I, numanimals, numanimals)
# Remove items where there is no data
nullobs = findall(isnothing, Y)
Z[nullobs, nullobs] .= 0
# Calculate heritability
λ = (1-h2)/h2
# Use the mixed-model equations
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
MMY = [X'*Y; Z'*Y]
solutions = MME\MMY
# Find the accuracies
diaginv = diag(inv(MME))
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
print("Done!\n")
### Output the results
print("[🐮]: Saving results...")
# Find how many traits we found BLUE for
numgroups = numgroups .- 1
# Start printing results to output
fileID = open(savepath, "w")
write(fileID, "beefblup Results Report\n")
write(fileID, "Produced using beefblup for Julia (")
write(fileID, "https://github.com/millironx/beefblup")
write(fileID, ")\n\n")
write(fileID, "Input:\t")
write(fileID, path)
write(fileID, "\nAnalysis performed:\t")
write(fileID, string(Dates.today()))
write(fileID, "\nTrait examined:\t")
write(fileID, headers[5])
write(fileID, "\n\n")
# Print base population stats
write(fileID, "Base Population:\n")
for i in 1:length(numgroups)
write(fileID, "\t")
write(fileID, headers[i+5])
write(fileID, ":\t")
write(fileID, normal[i])
write(fileID, "\n")
end
write(fileID, "\tMean ")
write(fileID, headers[5])
write(fileID, ":\t")
write(fileID, string(solutions[1]))
write(fileID, "\n\n")
# Contemporary group adjustments
counter = 2
write(fileID, "Contemporary Group Effects:\n")
for i in 1:length(numgroups)
write(fileID, "\t")
write(fileID, headers[i+5])
write(fileID, "\tEffect\tReliability\n")
for j in 1:numgroups[i]
write(fileID, "\t")
write(fileID, adjustedtraits[counter - 1])
write(fileID, "\t")
write(fileID, string(solutions[counter]))
write(fileID, "\t")
write(fileID, string(reliability[counter]))
write(fileID, "\n")
global counter = counter + 1
end
write(fileID, "\n")
end
write(fileID, "\n")
# Expected breeding values
write(fileID, "Expected Breeding Values:\n")
write(fileID, "\tID\tEBV\tReliability\n")
for i in 1:numanimals
write(fileID, "\t")
write(fileID, ids[i])
write(fileID, "\t")
write(fileID, string(solutions[i+counter-1]))
write(fileID, "\t")
write(fileID, string(reliability[i+counter-1]))
write(fileID, "\n")
end
write(fileID, "\n - END REPORT -")
close(fileID)
print("Done!\n")

View file

@ -1,13 +0,0 @@
# beefblup install
# Prepares the Julia environment for using beefblup by installing the requisite
# packages
# Usage: julia install.jl
# (C) 2020 Thomas A. Christensen II
# Licensed under BSD-3-Clause License
# Import the package manager
using Pkg
# Install requisite packages
Pkg.add("XLSX")
Pkg.add("Gtk")

View file

@ -1,19 +1,19 @@
BSD 3-Clause License BSD 3-Clause License
Copyright (c) 2020, Thomas A. Christensen II Copyright (c) 2021, Thomas A. Christensen II
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met: modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this 1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, 2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution. and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its 3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from contributors may be used to endorse or promote products derived from
this software without specific prior written permission. this software without specific prior written permission.

View file

@ -1,341 +0,0 @@
% beefblup
% Main script for performing single-variate BLUP to find beef cattle
% breeding values
% Usage: beefblup
% (C) 2020 Thomas A. Christensen II
% Licensed under BSD-3-Clause License
% Prepare the workspace for computation
clear
clc
close all
%% Display stuff
disp('beefblup v. 0.1')
disp('(C) 2020 Thomas A. Christensen II')
disp('https://github.com/millironx/beefblup')
disp(' ')
%% Prompt User
% Ask for an input spreadsheet
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
% Ask for an ouput text file
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
% Ask for heritability
h2 = input('What is the heritability for this trait? >> ');
%% Import input file
tic
disp(' ')
disp('Importing Excel file...')
% Import data from a suitable spreadsheet
fullname = [path name];
clear name path
[~, ~, data] = xlsread(fullname);
disp('Importing Excel file... Done!')
toc
disp(' ')
%% Process input file
tic
disp(' ')
disp('Processing and formatting data...')
disp(' ')
% Extract the headers into a separate array
headers = data(1,:);
data(1,:) = [];
% Convert the string dates to numbers
data(:,2) = num2cell(datenum(data(:,2)));
% Sort the array by date
data = sortrows(data,2);
% Coerce all id fields to string format
strings = [1 3 4];
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
% Define fields to hold id values for animals and their parents
ids = char(data{:,1});
damids = char(data{:,3});
sireids = char(data{:,4});
numanimals = length(data(:,1));
% Define fields to hold the index values for animals and their parents
dam = zeros(numanimals,1);
sire = zeros(numanimals,1);
% Find all row numbers where an animal was a parent
for i=1:numanimals
% Find all animals that this animal birthed
dammatch = ismember(damids, ids(i,:), 'rows');
damindexes = find(dammatch == 1);
dam(damindexes) = i;
% Find all animals that this animal sired
sirematch = ismember(sireids, ids(i,:), 'rows');
sireindexes = find(sirematch == 1);
sire(sireindexes) = i;
end
% Store column numbers that need to be deleted
% Column 6 contains an intermediate Excel calculation and always needs to
% be deleted
colstodelete = 6;
% Coerce each group to string format
for i = 7:length(headers)
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
end
% Find any columns that need to be deleted
for i = 7:length(headers)
if length(uniquecell(data(:,i))) <= 1
colname = headers{i};
disp(['Column "' colname '" does not have any unique animals and will be removed'])
disp('from this analysis');
colstodelete = [colstodelete i];
end
end
% Delete the appropriate columns from the datasheet and the headers
data(:,colstodelete) = [];
headers(colstodelete) = [];
% Determine how many contemporary groups there are
numgroups = ones(1, length(headers)-5);
for i = 6:length(headers)
numgroups(i-5) = length(uniquecell(data(:,i)));
end
% If there are more groups than animals, then the analysis cannot continue
if sum(numgroups) >= numanimals
disp('There are more contemporary groups than animals. The analysis will now abort.');
return
end
% Define a "normal" animal as one of the last in the groups, provided that
% all traits do not have null values
normal = cell([1 length(headers)-5]);
for i = 6:length(headers)
for j = numanimals:-1:1
if not(cellfun(@isempty, data(j,i)))
normal(i - 5) = data(j,i);
break
end
end
end
% Print the results of the "normal" definition
disp(' ')
disp('For the purposes of this analysis, a "normal" animal will be defined')
disp('by the following traits:')
for i = 6:length(headers)
disp([headers{i} ': ' normal{i-5}])
end
disp(' ')
disp('If no animal matching this description exists, the results may appear')
disp('outlandish, but are still as correct as the accuracy suggests')
disp(' ')
disp('Processing and formatting data... Done!')
toc
disp(' ')
%% Create the fixed-effect matrix
tic
disp(' ')
disp('Creating the fixed-effect matrix...')
% Form the fixed effect matrix
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
X(:,1) = ones(1, numanimals);
% Create an external counter that will increment through both loops
I = 2;
% Store the traits in a string cell array
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
% Iterate through each group
for i = 1:length(normal)
% Find the traits that are present in this trait
traits = uniquecell(data(:,i+5));
% Remove the "normal" version from the analysis
normalindex = find(strcmp(traits, normal{i}));
traits(normalindex) = [];
% Iterate inside of the group
for j = 1:length(traits)
matchedindex = find(strcmp(data(:,i+5), traits{j}));
X(matchedindex, I) = 1;
% Add this trait to the string
adjustedtraits(I - 1) = traits(j);
% Increment the big counter
I = I + 1;
end
end
disp('Creating the fixed-effect matrix... Done!')
toc
disp(' ')
%% Additive relationship matrix
tic
disp(' ')
disp('Creating the additive relationship matrix...')
% Create an empty matrix for the additive relationship matrix
A = zeros(numanimals, numanimals);
% Create the additive relationship matrix by the FORTRAN method presented
% by Henderson
for i = 1:numanimals
if dam(i) ~= 0 && sire(i) ~= 0
for j = 1:(i-1)
A(j,i) = 0.5*(A(j,sire(i))+A(j,dam(i)));
A(i,j) = A(j,i);
end
A(i,i) = 1 + 0.5*A(sire(i),dam(i));
elseif dam(i) ~= 0 && sire(i) == 0
for j = 1:(i-1)
A(j,i) = 0.5*A(j,dam(i));
A(i,j) = A(j,i);
end
A(i,i) = 1;
elseif dam(i) == 0 && sire(i) ~=0
for j = 1:(i-1)
A(j,i) = 0.5*A(j,sire(i));
A(i,j) = A(j,i);
end
A(i,i) = 1;
else
for j = 1:(i-1)
A(j,i) = 0;
A(i,j) = 0;
end
A(i,i) = 1;
end
end
disp('Creating the additive relationship matrix... Done!')
toc
disp(' ')
%% Perform BLUP
tic
disp(' ')
disp('Solving the mixed-model equations')
% Extract the observed data
Y = cell2mat(data(:, 5));
% The identity matrix for random effects
Z = eye(numanimals, numanimals);
% Remove items where there is no data
nullobs = find(isnan(Y));
Z(nullobs, nullobs) = 0;
% Calculate heritability
lambda = (1-h2)/h2;
% Use the mixed-model equations
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
% Find the accuracies
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
reliability = 1 - diaginv.*lambda;
disp('Solving the mixed-model equations... Done!')
toc
disp(' ')
%% Output the results
tic
disp(' ')
disp('Saving results...')
% Find how many traits we found BLUE for
numgroups = numgroups - 1;
% Start printing results to output
fileID = fopen([savepath savename], 'w');
fprintf(fileID, 'beefblup Results Report\n');
fprintf(fileID, 'Produced using beefblup for MATLAB (');
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
fprintf(fileID, ')\n\n');
fprintf(fileID, 'Input:\t');
fprintf(fileID, '%s', fullname);
fprintf(fileID, '\nAnalysis performed:\t');
fprintf(fileID, date);
fprintf(fileID, '\nTrait examined:\t');
fprintf(fileID, [headers{5}]);
fprintf(fileID, '\n\n');
% Print base population stats
fprintf(fileID, 'Base Population:\n');
for i = 1:length(numgroups)
fprintf(fileID, '\t');
fprintf(fileID, [headers{i+5}]);
fprintf(fileID, ':\t');
fprintf(fileID, [normal{i}]);
fprintf(fileID, '\n');
end
fprintf(fileID, '\tMean ');
fprintf(fileID, [headers{5}]);
fprintf(fileID, ':\t');
fprintf(fileID, num2str(solutions(1)));
fprintf(fileID, '\n\n');
I = 2;
% Contemporary group adjustments
fprintf(fileID, 'Contemporary Group Effects:\n');
for i = 1:length(numgroups)
fprintf(fileID, '\t');
fprintf(fileID, [headers{i+5}]);
fprintf(fileID, '\tEffect\tReliability\n');
for j = 1:numgroups(i)
fprintf(fileID, '\t');
fprintf(fileID, [adjustedtraits{I-1}]);
fprintf(fileID, '\t');
fprintf(fileID, num2str(solutions(I)));
fprintf(fileID, '\t');
fprintf(fileID, num2str(reliability(I)));
fprintf(fileID, '\n');
I = I + 1;
end
fprintf(fileID, '\n');
end
fprintf(fileID, '\n');
% Expected breeding values
fprintf(fileID, 'Expected Breeding Values:\n');
fprintf(fileID, '\tID\tEBV\tReliability\n');
for i = 1:numanimals
fprintf(fileID, '\t');
fprintf(fileID, [data{i,1}]);
fprintf(fileID, '\t');
fprintf(fileID, num2str(solutions(i+I-1)));
fprintf(fileID, '\t');
fprintf(fileID, num2str(reliability(i+I-1)));
fprintf(fileID, '\n');
end
fprintf(fileID, '\n - END REPORT -');
fclose(fileID);
disp('Saving results... Done!')
toc
disp(' ')

View file

@ -1,9 +0,0 @@
% uniquenan
% Serves the same purpose as UNIQUE, but ensures any empty cells are not
% counted
function y = uniquecell(x)
y = unique(x);
if any(cellfun(@isempty, y))
y(cellfun(@isempty, y)) = [];
end
end

21
Project.toml Normal file
View file

@ -0,0 +1,21 @@
name = "BeefBLUP"
uuid = "03993faf-e476-444a-86c9-f31e8122fa24"
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
version = "0.3.0"
[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[compat]
julia = "1.3"
[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[targets]
test = ["Test"]

View file

@ -1,5 +1,9 @@
# [:cow:]: beefblup # [:cow:]: beefblup
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://millironx.github.io/beefblup/stable)
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://MillironX.github.io/beefblup/dev)
[![Build Status](https://travis-ci.com/millironx/beefblup.svg?branch=develop)](https://travis-ci.com/millironx/beefblup)
[![Coverage](https://codecov.io/gh/millironx/beefblup/branch/develop/graph/badge.svg)](https://codecov.io/gh/millironx/beefblup)
[![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md) [![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md)
[![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases) [![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases)
@ -13,35 +17,34 @@ Why? It's part of my effort to
## Installation ## Installation
1. [Download and install Julia](https://julialang.org/downloads/platform/) 1. [Download and install Julia](https://julialang.org/downloads/platform/)
2. Open a new Julia window and type the `]` key 2. Download the [beefblup ZIP
3. Type `add XLSX Gtk` and press **Enter** file](https://github.com/MillironX/beefblup/archive/refs/tags/v0.2.2.zip) and unzip it someplace memorable
3. In your file explorer, copy the address of the "beefblup" folder
Alternatively, you can run the [install 4. Launch Julia
script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from 5. Type `cd("<the address copied in step 5>")` and press **Enter** (For example,
Julia. `cd("C:\Users\MillironX\Documents\beefblup")`)
6. Type the `]` key
7. Type `activate .` and press **Enter**
8. Type `instantiate` and press **Enter**
9. Installation is done: you can close the Julia window
## How to Use ## How to Use
> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118). 1. Make a copy of the "sample.csv" spreadsheet and replace the data with your own
> Although it's tempting to just open up beefblup in Juno and press the big play 1. The trait you wish to calculate EBVs for always goes in the rightmost column
> button, it won't work. Follow these instructions until it's fixed. If you 2. If you wish to add more contemporary group traits to your analysis, include them before the rightmost column
> don't know what Juno is: ignore this message. 2. Save and close
3. In your file explorer, copy the address of the "beefblup" folder
1. Download the [beefblup ZIP 4. Launch Julia
file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it 5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
someplace memorable `cd("C:\Users\MillironX\Documents\beefblup")`)
2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own 6. Type the `]` key
3. If you wish to add more contemporary group traits to your analysis, replace 7. Type `activate .` and press **Enter**
or add them to the right of the Purple section 8. Press **Backspace**
4. Save and close 9. Type `include("src/beefblup.jl")` and press **Enter**
5. In your file explorer, copy the address of the "Julia" folder 10. Select the spreadsheet you created in steps 1-4
6. Launch Julia 11. Follow the on-screen prompts
7. Type `cd("<the address copied in step 5")` and press **Enter** (For example, 12. **#KeepEPDsReal!**
`cd("C:\Users\MillironX\Documents\beefblup\Julia")`)
8. Type `include("beefblup.jl")` and press **Enter**
9. Select the spreadsheet you created in steps 1-4
10. Follow the on-screen prompts
11. **#KeepEPDsReal!**
## For Programmers ## For Programmers
@ -51,19 +54,19 @@ Julia.
### Development Roadmap ### Development Roadmap
| Version | Feature | | Version | Feature | Status |
| ------- | ------------------------------------------------------------------- | | ------- | ----------------------------------------------------------------------------------- | ------------------ |
| v0.1 | Julia port of original MATLAB script | | v0.1 | Julia port of original MATLAB script | :heavy_check_mark: |
| v0.2 | Spreadsheet format redesign | | v0.2 | Spreadsheet format redesign | :heavy_check_mark: |
| v0.3 | API rewrite (change to function calls and package format instead of script running) | | v0.3 | API rewrite (change to function calls and package format instead of script running) | |
| v0.4 | Add GUI for all options | | v0.4 | Add GUI for all options | |
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | | v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | |
| v0.6 | Repeated measurement BLUP (aka dairyblup) | | v0.6 | Repeated measurement BLUP (aka dairyblup) | |
| v0.7 | Multiple trait BLUP | | v0.7 | Multiple trait BLUP | |
| v0.8 | Maternal effects BLUP | | v0.8 | Maternal effects BLUP | |
| v0.9 | Genomic BLUP | | v0.9 | Genomic BLUP | |
| v0.10 | beefblup binaries | | v0.10 | beefblup binaries | |
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | | v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | |
### Bug Reports ### Bug Reports

47
beefblup Executable file
View file

@ -0,0 +1,47 @@
#!/bin/bash
# -*- mode: julia -*-
#=
exec julia --project="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" "${BASH_SOURCE[0]}" "$@"
=#
# beefblup cli
# Provides a convenient CLI for using beefblup
# cSpell:includeRegExp #.*
# cSpell:includeRegExp ("""|''')[^\1]*\1
# Import packages
using BeefBLUP
using ArgParse
# If this is run without arguments, catch that before parsing
if isempty(ARGS)
BeefBLUP.beefblup()
exit()
end
# Setup the argument table
argsettings = ArgParseSettings()
@add_arg_table argsettings begin
"datafile"
help = "datasheet containing the pedigree, fixed effects, and response trait formatted in CSV"
required = true
arg_type = String
"resultsfile"
help = "filename for storing the results of the analysis"
required = false
arg_type = String
"--heritability", "-t"
help = "heritability of the response trait being analyzed"
required = true
arg_type = Float64
end
arguments = parse_args(argsettings)
h2 = arguments["heritability"]
if isnothing(arguments["resultsfile"])
BeefBLUP.beefblup(arguments["datafile"], h2)
exit()
end
BeefBLUP.beefblup(arguments["datafile"], arguments["resultsfile"], h2)

8
data/sample.csv Normal file
View file

@ -0,0 +1,8 @@
id,birthdate,dam,sire,year,sex,species,weaning_weight
1,1/1/1990,,,1990,male,cow,354
2,1/1/1990,,,1990,female,cow,251
3,1/1/1991,,1,1991,male,cow,327
4,1/1/1991,,1,1991,female,cow,328
5,1/1/1991,2,1,1991,male,cow,301
6,1/1/1991,2,,1991,female,cow,270
7,1/1/1992,,,1992,male,cow,330
1 id birthdate dam sire year sex species weaning_weight
2 1 1/1/1990 1990 male cow 354
3 2 1/1/1990 1990 female cow 251
4 3 1/1/1991 1 1991 male cow 327
5 4 1/1/1991 1 1991 female cow 328
6 5 1/1/1991 2 1 1991 male cow 301
7 6 1/1/1991 2 1991 female cow 270
8 7 1/1/1992 1992 male cow 330

3
docs/Project.toml Normal file
View file

@ -0,0 +1,3 @@
[deps]
BeefBLUP = "03993faf-e476-444a-86c9-f31e8122fa24"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"

26
docs/make.jl Normal file
View file

@ -0,0 +1,26 @@
using BeefBLUP
using Documenter
DocMeta.setdocmeta!(BeefBLUP, :DocTestSetup, :(using BeefBLUP); recursive=true)
makedocs(;
modules=[BeefBLUP],
authors="Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>",
repo="https://github.com/MillironX/beefblup/blob/{commit}{path}#{line}",
sitename="beefblup",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://millironx.com/beefblup",
assets=String[],
),
pages=[
"Home" => "index.md",
"How to Calculate EPDs" => "how-to-calculate-epds.md",
"CLI Reference (WIP)" => "beefblup-cli.md"
],
)
deploydocs(;
repo="github.com/MillironX/beefblup",
devbranch="develop",
)

View file

@ -0,0 +1,3 @@
{
"MD041": false
}

106
docs/src/beefblup-cli.md Normal file
View file

@ -0,0 +1,106 @@
```@meta
CurrentModule = BeefBLUP
```
# beefblup Command Line Interface (CLI) documentation
> _A work in progress_
**Notice:** This document is a draft for what the command-line interface for
beefblup would look like as of version 1.0, if beefblup was even a command-line
application to begin with (it's not). It is modeled (loosely) after the man page
format. It is not intended to be taken seriously, but instead to serve as a
useful thought experiment and brainstorming ground on the future of beefblup.
Please use it if it clarifies things for you. If it doesn't, ignore it.
## Input file
beefblup requires a very specific format of input file. The format may be in
comma-separated values (CSV) or Excel 2007+ (XLSX) format. CSV files should not
be quoted (and therefore cannot have commas within cell values). Other formats
may be forthcoming.
A beefblup data file must have at least six columns appearing in this order:
- ID
- Sire ID
- Dam ID
- Birthdate
- Fixed effect(s)
- Response variable(s)
The first row always contains column names. The values of column names are
unimportant for the first four columns, as they will always be treated the same
regardless of the name. The generated report will use the column names
of fixed effects and response variables as given.
Each fixed effect should have its own column, to as many as are needed. There is
no limit to the number of fixed effects as defined by beefblup, however its
dependencies might have some. The same rules apply to response variables.
Unknown values should be left blank (`,,`). Do not substitute null placeholders
(e.g. `NULL`, `NA`, `0`, `nothing`, `undefined`, etc.) for unknown values.
An example spreadsheet might have the following format
| ID | Sire ID | Dam ID | Birthdate | Sex | Weaning Weight |
| --- | ------- | ------ | --------- | ------ | -------------- |
| 1 | | | 1/1/1990 | Male | 354 |
| 2 | 1 | | 1/1/1990 | Female | 251 |
| 3 | 1 | | 1/1/1991 | Male | 327 |
| 4 | 1 | 2 | 1/1/1991 | Female | 328 |
| 5 | | 2 | 1/1/1991 | Male | 301 |
| 6 | | | 1/1/1991 | Female | 270 |
| 7 | | | 1/1/1992 | Male | 330 |
## Synopsis
```bash
beefblup [-G SNPs_file] [-M num_response_vars] [-o report_spreadsheet]
[--no-aod] [--no-year] [--no-season] [--no-autob] [--maternal] input_file
[report_file]
```
## Command line basic syntax
The most basic input is to simply pass the input file name to the program.
```bash
beefblup filename.csv
```
In this case beefblup will insert fixed-effects for age-of-dam, year, and
season, and will calculate the EBVs for the response variable in the final
column. The report will then be saved as `filename_report.txt`.
## Suppressing automatically-calculated fixed effects
If you don't wish to include one of the automatically calculated fixed-effects
from your model, you can pass arguments to suppress them.
### Suppress Age-of-dam
```bash
beefblup --no-aod filename.csv
```
### Suppress year
```bash
beefblup --no-year filename.csv
```
### Suppress season
```bash
beefblup --no-season filename.csv
```
### Suppress all calculated fixed effects
```bash
beefblup --no-autob filename.csv
```
The argument `--no-autob` comes from the nomenclature of assigning fixed-effects
to the matrix _b_ in Henderson's mixed-model equations.

View file

@ -0,0 +1,542 @@
```@meta
CurrentModule = BeefBLUP
```
# How to Calculate EPDs
Not to exclude our Australian comrades or our dairy friends, this guide could
alternately be called
- How to Calculate Expected Breeding Values (EBVs)
- How to Calculate Predicted Transmitting Abilities (PTAs)
- How to Calculate Expected Progeny Differences (EPDs)
Since I'm mostly talking to American beef producers, though, we'll stick with
EPDs for most of this discussion.
Expected Breeding Values (EBVs) (which are more often halved and published as
Expected Progeny Differences (EPDs) or Predicted Transmitting Abilities (PTAs)
in the United States) are generally found using Charles Henderson's linear
mixed-model equations. Great, you say, what is that? I'm glad you asked...
## The mathematical model
Every genetics textbook starts with the following equation
```math
P = G + E
```
Where:
- ``P`` = phenotype
- ``G`` = genotype (think: breeding value)
- ``E`` = environmental factors
Now, we can't identify _every_ environmental factor that affects phenotype, but
we can identify some of them, so let's substitute ``E`` with some absolutes. A
good place to start is the "contemporary group" listings for the trait of
interest in the
[BIF Guidelines](https://beefimprovement.org/wp-content/uploads/2018/03/BIFGuidelinesFinal_updated0318.pdf),
though for the purposes of this example, I'm only going to consider sex, and
birth year.
```math
P = G + E_{year} + E_{sex}
```
Where:
- ``E_n`` is the effect of ``n`` on the phenotype
Now let's say I want to find the weaning weight breeding value (``G``) of my
favorite herd bull. I compile his stats, and then plug them into the equation
and solve for ``G``, right? Let's try that.
### Calf Records
| ID | Birth Year | Sex | YW (kg) |
|:-- | :--------- | :----- |:------- |
| 1 | 1990 | Male | 354 |
```math
354 \ \textup{kg} = G_1 + E_{1990} + E_{male}
```
Hmm. I just realized I don't know any of those ``E`` values. Come to think of it,
I remember from math class that I will need as many equations as I have
unknowns, so I will add equations for other animals that I have records for.
### Calf Records
| ID | Birth Year | Sex | YW (kg) |
|:--- |:---------- |:------ |:------- |
| 1 | 1990 | Male | 354 |
| 2 | 1990 | Female | 251 |
| 3 | 1991 | Male | 327 |
| 4 | 1991 | Female | 328 |
| 5 | 1991 | Male | 301 |
| 6 | 1991 | Female | 270 |
| 7 | 1992 | Male | 330 |
```math
\begin{aligned}
251 \ \textup{kg} &= G_2 + E_{1990} + E_{female} \\
327 \ \textup{kg} &= G_3 + E_{1991} + E_{male} \\
328 \ \textup{kg} &= G_4 + E_{1991} + E_{female} \\
301 \ \textup{kg} &= G_5 + E_{1991} + E_{male} \\
270 \ \textup{kg} &= G_6 + E_{1991} + E_{female} \\
330 \ \textup{kg} &= G_7 + E_{1992} + E_{male}
\end{aligned}
```
Drat! Every animal I added brings more variables into the system than it
eliminates! In fact, since each cow brings in _at least_ one term
(``G_n``), I will never be able to write enough equations to solve for
``G`` numerically. I will have to use a different approach.
## The statistical model: the setup
Since I can never solve for ``G`` directly, I will have to find some way to
estimate it. I can switch to a statistical model and solve for ``G`` that way. The
caveat with a statistical model is that there will be some level of error, but
so long as we know and can control the level of error, that will be better than
not knowing ``G`` at all.
Since we're switching into a statistical space, we should also switch the
variables we're using. I'll rewrite the first equation as
```math
y = b + u + e
```
Where:
- ``y`` = Phenotype
- ``b`` = Environment
- ``u`` = Genotype
- ``e`` = Error
It's not as easy as simply substituting ``b`` for every ``E`` that we had above,
however. The reason for that is that we must make the assumption that
environment is a **fixed effect** and that genotype is a **random effect**. I'll
go over why that is later, but for now, understand that we need to transform the
environment terms and genotype terms separately.
We'll start with the environment terms.
## The statistical model: environment as fixed effects
To properly transform the equations, I will have to introduce
``b_{mean}`` terms in each animal's equation. This is part of the fixed
effect statistical assumption, and it will let us obtain a solution.
Here are the transformed equations:
```math
\begin{aligned}
354 \ \textup{kg} &= u_1 + b_{mean} + b_{1990} + b_{male} + e_1 \\
251 \ \textup{kg} &= u_2 + b_{mean} + b_{1990} + b_{female} + e_2 \\
327 \ \textup{kg} &= u_3 + b_{mean} + b_{1991} + b_{male} + e_3 \\
328 \ \textup{kg} &= u_4 + b_{mean} + b_{1991} + b_{female} +e_4 \\
301 \ \textup{kg} &= u_5 + b_{mean} + b_{1991} + b_{male} + e_5 \\
270 \ \textup{kg} &= u_6 + b_{mean} + b_{1991} + b_{female} + e_6 \\
330 \ \textup{kg} &= u_7 + b_{mean} + b_{1992} + b_{male} + e_7
\end{aligned}
```
Statistical methods work best in matrix form, so I'm going to convert the set of
equations above to a single matrix equation that means the exact same thing.
```math
\begin{bmatrix}
354 \ \textup{kg} \\
251 \ \textup{kg} \\
327 \ \textup{kg} \\
328 \ \textup{kg} \\
301 \ \textup{kg} \\
270 \ \textup{kg} \\
330 \ \textup{kg}
\end{bmatrix}
=
\begin{bmatrix}
u_1 \\
u_2 \\
u_3 \\
u_4 \\
u_5 \\
u_6 \\
u_7
\end{bmatrix}
+
b_{mean}
+
\begin{bmatrix}
b_{1990} \\
b_{1990} \\
b_{1991} \\
b_{1991} \\
b_{1991} \\
b_{1991} \\
b_{1992}
\end{bmatrix}
+
\begin{bmatrix}
b_{male} \\
b_{female} \\
b_{male} \\
b_{female} \\
b_{male} \\
b_{female} \\
b_{male}
\end{bmatrix}
+
\begin{bmatrix}
e_1 \\
e_2 \\
e_3 \\
e_4 \\
e_5 \\
e_6 \\
e_7
\end{bmatrix}
```
That's a nice equation, but now my hand is getting tired writing all those ``b``
terms over and over again, so I'm going to use
[the dot product](https://www.khanacademy.org/math/precalculus/x9e81a4f98389efdf:matrices/x9e81a4f98389efdf:multiplying-matrices-by-matrices/v/matrix-multiplication-intro)
to condense this down.
```math
\begin{bmatrix}
354 \textup{kg} \\
251 \textup{kg} \\
327 \textup{kg} \\
328 \textup{kg} \\
301 \textup{kg} \\
270 \textup{kg} \\
330 \textup{kg}
\end{bmatrix}
=
\begin{bmatrix}
u_1 \\
u_2 \\
u_3 \\
u_4 \\
u_5 \\
u_6 \\
u_7
\end{bmatrix}
+
\begin{bmatrix}
1 & 1 & 0 & 0 & 1 & 0 \\
1 & 1 & 0 & 0 & 0 & 1 \\
1 & 0 & 1 & 0 & 1 & 0 \\
1 & 0 & 1 & 0 & 0 & 1 \\
1 & 0 & 1 & 0 & 1 & 0 \\
1 & 0 & 0 & 1 & 1 & 0
\end{bmatrix}
\begin{bmatrix}
b_{mean} \\
b_{1990} \\
b_{1991} \\
b_{1992} \\
b_{male} \\
b_{female}
\end{bmatrix}
+
\begin{bmatrix}
e_1 \\
e_2 \\
e_3 \\
e_4 \\
e_5 \\
e_6 \\
e_7
\end{bmatrix}
```
That matrix in the middle with all the zeros and ones is called the **incidence
matrix**, and essentially reads like a table with each row corresponding to an
animal, and each column corresponding to a fixed effect. For brevity, we'll just
call it ``X``, though. One indicates that the animal and effect go together,
and zero means they don't. For our record, we could write a table to go with
``X``, and it would look like this:
| Animal | mean | 1990 | 1991 | 1992 | male | female |
|:------ |:---- |:---- |:---- |:---- |:---- |:------ |
| 1 | yes | yes | no | no | yes | no |
| 2 | yes | yes | no | no | no | yes |
| 3 | yes | no | yes | no | yes | no |
| 4 | yes | no | yes | no | no | yes |
| 5 | yes | no | yes | no | yes | no |
| 6 | yes | no | yes | no | no | yes |
| 7 | yes | no | no | yes | yes | no |
Now that we have ``X``, we have the ability to start making changes to allow
us to solve for ``u``. Immediately, we see that ``X`` is **singular**, meaning
it can't be solved directly. We kind of already knew that, but now we can
quantify it. We calculate the
[rank of ``X``](https://math.stackexchange.com/a/2080577),
and find that there is only enough information contained in it to solve for 4
variables, which means we need to eliminate two columns.
There are several ways to effectively eliminate fixed effects in this type of
system, but one of the simplest and the most common methods is to declare a
**base population**, and lump the fixed effects of animals within the base
population into the mean fixed effect. Note that it is possible to declare a
base population that has no animals in it, but that gives weird results. For
this example, we'll follow the convention built into `beefblup` and pick the
last occuring form of each variable.
### Base population
- **Year**: 1992
- **Sex**: Female
Now in order to use the base population, we simply drop the columns representing
conformity with the traits in the base population from ``X````. Our new
equation looks like
```math
\begin{bmatrix}
354 \ \textup{kg} \\
251 \ \textup{kg} \\
327 \ \textup{kg} \\
328 \ \textup{kg} \\
301 \ \textup{kg} \\
270 \ \textup{kg} \\
330 \ \textup{kg}
\end{bmatrix}
=
\begin{bmatrix}
u_1 \\
u_2 \\
u_3 \\
u_4 \\
u_5 \\
u_6 \\
u_7
\end{bmatrix}
+
\begin{bmatrix}
1 & 1 & 0 1 \\
1 & 1 & 0 0 \\
1 & 0 & 1 1 \\
1 & 0 & 1 0 \\
1 & 0 & 1 1 \\
1 & 0 & 0 1
\end{bmatrix}
+
\begin{bmatrix}
b_{mean} \\
b_{1990} \\
b_{1991} \\
b_{male} \\
\end{bmatrix}
+
\begin{bmatrix}
e_1 \\
e_2 \\
e_3 \\
e_4 \\
e_5 \\
e_6 \\
e_7
\end{bmatrix}
```
And the table for humans to understand:
| Animal | mean | 1990 | 1991 | female |
|:------ |:---- |:---- |:---- |:------ |
| 1 | yes | yes | no | no |
| 2 | yes | yes | no | yes |
| 3 | yes | no | yes | no |
| 4 | yes | no | yes | yes |
| 5 | yes | no | yes | no |
| 6 | yes | no | yes | yes |
| 7 | yes | no | no | no |
Even though each animal is said to participate in the mean, the result for the
mean will now actually be the average of the base population. Math is weird
sometimes.
Double-checking, the rank of ``X`` is still 4, so we can solve for the average
of the base population, and the effect of being born in 1990, the effect of
being born in 1991, and the effect of being male.
Whew! That was some transformation. We still haven't constrained this model
enough to solve it, though. Now on to the genotype.
## The statistical model: genotype as random effect
Remember I said above that genotype was a **random effect**? Statisticians say
"_a random effect is an effect that influences the variance and not the mean of
the observation in question._" I'm not sure exactly what that means or how that
is applicable to genotype, but it does let us add an additional constraint to
our model.
The basic gist of genetics is that organisms that are related to one another are
similar to one another. Based on a pedigree, we can even say how related to one
another animals are, and quantify that as the amount that the genotype terms
should be allowed to vary between related animals.
We'll need a pedigree for our animals:
### Calf Records
| ID | Sire | Dam | Birth Year | Sex | YW (kg) |
|:-- |:---- |:--- |:---------- |:------ |:------- |
| 1 | NA | NA | 1990 | Male | 354 |
| 2 | NA | NA | 1990 | Female | 251 |
| 3 | 1 | NA | 1991 | Male | 327 |
| 4 | 1 | NA | 1991 | Female | 328 |
| 5 | 1 | 2 | 1991 | Male | 301 |
| 6 | NA | 2 | 1991 | Female | 270 |
| 7 | NA | NA | 1992 | Male | 330 |
Now, because cows sexually reproduce, the genotype of one animal is halfway the
same as that of either parent (exception: inbreeding, see below). It should go
without saying that each animal's genotype is identical to that of itself. From
this we can then find the numerical multiplier for any relative (grandparent =
1/4, full sibling = 1, half sibling = 1/2, etc.). Let's write those values down
in a table.
| ID | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|:-- |:--- |:--- |:--- |:--- |:--- |:--- |:-- |
| 1 | 1 | 0 | 1/2 | 1/2 | 1/2 | 0 | 0 |
| 2 | 0 | 1 | 0 | 0 | 1/2 | 1/2 | 0 |
| 3 | 1/2 | 0 | 1 | 1/4 | 1/4 | 0 | 0 |
| 4 | 1/2 | 0 | 1/4 | 1 | 1/4 | 0 | 0 |
| 5 | 1/2 | 1/2 | 1/4 | 1/4 | 1 | 1/4 | 0 |
| 6 | 0 | 1/2 | 0 | 0 | 1/4 | 1 | 0 |
| 7 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Hmm. All those numbers look suspiciously like a matrix. Why don't I put them
into a matrix called ``A``?
```math
\begin{bmatrix}
1 & 0 & \frac{1}{2} & \frac{1}{2} & \frac{1}{2} & 0 & 0 \\
0 & 1 & 0 & 0 & \frac{1}{2} & \frac{1}{2} & 0 \\
\frac{1}{2} & 0 & 1 & \frac{1}{4} & \frac{1}{4} & 0 & 0 \\
\frac{1}{2} & 0 & \frac{1}{4} & 1 & \frac{1}{4} & 0 & 0 \\
\frac{1}{2} & \frac{1}{2} & \frac{1}{4} & \frac{1}{4} & 1 & \frac{1}{4} & 0 \\
0 & \frac{1}{2} & 0 & 0 & \frac{1}{4} & 1 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 1
\end{bmatrix}
```
Now I'm going to take the matrix with all of the ``u`` values, and call it
``μ``. To quantify the idea of genetic relationship, I will then say that
```math
\textup{var}(μ) = A σ_μ^2
```
Where:
- ``A`` = the relationship matrix defined above
- ``σ_μ^2`` = the standard deviation of all the genotypes
To fully constrain the system, I have to make two more assumptions: 1) that the
error term in each animal's equation is independent from all other error terms,
and 2) that the error term for each animal is independent from the value of the
genotype. I will call the matrix holding the ``e`` values ``ε`` and then say
```math
\textup{var}(ϵ) = I σ_ϵ^2
```
```math
\textup{cov}(μ, ϵ) = \textup{cov}(ϵ, μ) = 0
```
Substituting in the matrix names, our equation now looks like
```math
\begin{bmatrix}
354 \textup{kg} \\
251 \textup{kg} \\
327 \textup{kg} \\
328 \textup{kg} \\
301 \textup{kg} \\
270 \textup{kg} \\
330 \textup{kg}
\end{bmatrix}
= μ + X
\begin{bmatrix}
b_{mean} \\
b_{1990} \\
b_{1991} \\
b_{male} \\
\end{bmatrix}
+ ϵ
```
We are going to make three changes to this equation before we are ready to solve
it, but they are cosmetic details for this example.
1. Call the matrix on the left side of the equation ``Y`` (sometimes it's
called the **matrix of observations**)
2. Multiply ``μ`` by an identity matrix called ``Z``. Multiplying by the
identity matrix is the matrix form of multiplying by one, so nothing changes,
but if we later want to find one animal's genetic effect on another animal's
performance (e.g. a **maternal effects model**), we can alter ``Z`` to
allow that
3. Call the matrix with all the ``b`` values ``β``.
With all these changes, we now have
```math
Y = Z μ + X β + ϵ
```
This is the canonical form of the mixed-model equation, and the form that
Charles Henderson used to first predict breeding values of livestock.
## Solving the equations
Henderson proved that the mixed-model equation can be solved by the following:
```math
\begin{bmatrix}
\hat{β} \\
\hat{μ}
\end{bmatrix}
=
\begin{bmatrix}
X'X & X'Z \\
Z'X & Z'Z+A^{-1}λ
\end{bmatrix}^{-1}
\begin{bmatrix}
X'Y \\
Z'Y
\end{bmatrix}
```
Where
- The variables with hats are the statistical estimates of their mixed-model
counterparts
- The predicted value of ``μ`` is called the _Best Linear Unbiased
Predictor_ or _BLUP_
- The estimated value of ``β`` is called the _Best Linear Unbiased Estimate_
or _BLUE_
- ' is the transpose operator
- ``λ`` is a single real number that is a function of the heritability for the trait
being predicted. It can be left out in many cases (``λ = 1``).
- ``λ = \frac{1-h^2}{h^2}``
What happened to
## Footnotes
### Exception
An animal **can** share its genome with itself by a factor of more than one:
that's called inbreeding! We can account for this, and `beefblup` does as it
calculates ``A``. This is an area that actually merits a good deal of study:
see chapter 2 of _Linear Models for the Prediction of Animal Breeding Values_ by
Raphael A. Mrode (ISBN 978 1 78064 391 5).

14
docs/src/index.md Normal file
View file

@ -0,0 +1,14 @@
```@meta
CurrentModule = BeefBLUP
```
# beefblup
Documentation for [BeefBLUP](https://github.com/MillironX/beefblup).
```@index
```
```@docs
beefblup
```

308
src/BeefBLUP.jl Executable file
View file

@ -0,0 +1,308 @@
# beefblup
# Julia package for performing single-variate BLUP to find beef cattle
# breeding values
# (C) 2021 Thomas A. Christensen II
# Licensed under BSD-3-Clause License
# cSpell:includeRegExp #.*
# cSpell:includeRegExp ("""|''')[^\1]*\1
module BeefBLUP
# Import the required packages
using CSV
using DataFrames
using LinearAlgebra
using Dates
using Gtk
# Main entry-level function - acts just like the script
function beefblup()
# Ask for an input spreadsheet
path = open_dialog_native(
"Select a beefblup worksheet",
GtkNullContainer(),
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
)
# Ask for an output text filename
savepath = save_dialog_native(
"Save your beefblup results",
GtkNullContainer(),
(GtkFileFilter("*.txt", name="Results file"),
"*.txt")
)
# Ask for heritability
print("What is the heritability for this trait?> ")
h2 = parse(Float64, readline(stdin))
beefblup(path, savepath, h2)
end
function beefblup(datafile::String, h2::Float64)
# Assume the data is named the same as the file without the trailing extension
dataname = join(split(datafile, ".")[1:end - 1])
# Create a new results name
resultsfile = string(dataname, "_results.txt")
# Pass this info on to the worker
beefblup(datafile, resultsfile, h2)
end
# Main worker function, can perform all the work if given all the user input
function beefblup(path::String, savepath::String, h2::Float64)
# Import data from a suitable spreadsheet
data = DataFrame(CSV.File(path))
# Make sure the data is in the proper format
renamecolstospec!(data)
# Sort the array by date
sort!(data, :birthdate)
# Define fields to hold id values for animals and their parents
numanimals = length(data.id)
# Calculate the relationship matrix
A = additiverelationshipmatrix(data.id, data.dam, data.sire)
# Extract all of the fixed effects
fixedeffectdata = data[:,5:end-1]
(X, fixedeffects) = fixedeffectmatrix(fixedeffectdata)
# Extract the observed data
Y = convert(Array{Float64}, data[:,end])
# The random effects matrix
Z = Matrix{Int}(I, numanimals, numanimals)
# Remove items where there is no data
nullobs = findall(isnothing, Y)
Z[nullobs, nullobs] .= 0
# Calculate heritability
λ = (1 - h2) / h2
# Use the mixed-model equations
MME = [X' * X X' * Z; Z' * X (Z' * Z) + (inv(A) .* λ)]
MMY = [X' * Y; Z' * Y]
solutions = MME \ MMY
# Find the accuracies
diaginv = diag(inv(MME))
reliability = ones(Float64, length(diaginv)) .- diaginv .* λ
# Find how many traits we found BLUE for
numgroups = length(reliability) - numanimals
# Split the BLUP and BLUE results
β = solutions[1:numgroups]
μ = solutions[numgroups+1:end]
μ_reliability = reliability[numgroups+1:end]
# Extract the names of the traits
traitname = names(data)[end]
# Start printing results to output
fileID = open(savepath, "w")
write(fileID, "beefblup Results Report\n")
write(fileID, "Produced using beefblup (")
write(fileID, "https://github.com/millironx/beefblup")
write(fileID, ")\n\n")
write(fileID, "Input:\t")
write(fileID, path)
write(fileID, "\nAnalysis performed:\t")
write(fileID, string(Dates.today()))
write(fileID, "\nTrait examined:\t")
write(fileID, traitname)
write(fileID, "\n\n")
# Print base population stats
write(fileID, "Base Population:\n")
for i in 1:length(fixedeffects)
effect = fixedeffects[i]
write(fileID, "\t")
write(fileID, string(effect.name))
write(fileID, ":\t")
write(fileID, string(effect.basetrait))
write(fileID, "\n")
end
write(fileID, "\tMean ")
write(fileID, traitname)
write(fileID, ":\t")
write(fileID, string(solutions[1]))
write(fileID, "\n\n")
# Contemporary group adjustments
counter = 2
write(fileID, "Contemporary Group Effects:\n")
for i in 1:length(fixedeffects)
effect = fixedeffects[i]
write(fileID, "\t")
write(fileID, effect.name)
write(fileID, "\tEffect\n")
for j in 1:length(effect.alltraits)
types = effect.alltraits[j]
write(fileID, "\t")
write(fileID, string(types))
write(fileID, "\t")
write(fileID, string(β[counter]))
write(fileID, "\n")
counter = counter + 1
end
write(fileID, "\n")
end
write(fileID, "\n")
# Expected breeding values
write(fileID, "Expected Breeding Values:\n")
write(fileID, "\tID\tEBV\tReliability\n")
for i in 1:numanimals
write(fileID, "\t")
write(fileID, string(data.id[i]))
write(fileID, "\t")
write(fileID, string(μ[i]))
write(fileID, "\t")
write(fileID, string(μ_reliability[i]))
write(fileID, "\n")
end
write(fileID, "\n - END REPORT -")
close(fileID)
end
"""
fixedeffectmatrix(fixedeffectdata::DataFrame)
Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed
effects listed in `fixedeffectdata`.
Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the
actual matrix, and `fixedeffects` is the contemporary groupings.
"""
function fixedeffectmatrix(fixedeffectdata::DataFrame)
# Declare an empty return matrix
fixedeffects = FixedEffect[]
# Add each trait to the array
for i in 1:size(fixedeffectdata)[2]
name = names(fixedeffectdata)[i]
traits = eachcol(fixedeffectdata)[i]
if length(unique(traits)) > 1
push!(fixedeffects, FixedEffect(name, traits))
else
@warn string("column '", name, "' does not have any unique animals and will be dropped from analysis")
pname = propertynames(fixedeffectdata)[i]
DataFrames.select!(fixedeffectdata, Not(pname))
end
end
X = ones(Int64, (size(fixedeffectdata)[1], 1))
for i in 1:length(fixedeffects)
trait = fixedeffects[i]
for phenotype in trait.alltraits
X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2)
end
end
return X, fixedeffects
end
"""
additiverelationshipmatrix(id, dam, sire)
Returns the additive numerator relationship matrix based on the pedigree provided in `dam`
and `sire` for animals in `id`.
"""
function additiverelationshipmatrix(id::AbstractVector, damid::AbstractVector, sireid::AbstractVector)
# Sanity-check for valid pedigree
if !(length(id) == length(damid) && length(damid) == length(sireid))
throw(ArgumentError("id, dam, and sire must be of the same length"))
end
# Convert to positions
dam = indexin(damid, id)
sire = indexin(sireid, id)
# Calculate loop iterations
numanimals = length(dam)
# Create an empty matrix for the additive relationship matrix
A = zeros(numanimals, numanimals)
# Create the additive relationship matrix by the FORTRAN method presented by
# Henderson
for i in 1:numanimals
if !isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i - 1)
A[j,i] = 0.5 * (A[j,sire[i]] + A[j,dam[i]])
A[i,j] = A[j,i]
end
A[i,i] = 1 + 0.5 * A[sire[i], dam[i]]
elseif !isnothing(dam[i]) && isnothing(sire[i])
for j in 1:(i - 1)
A[j,i] = 0.5 * A[j,dam[i]]
A[i,j] = A[j,i]
end
A[i,i] = 1
elseif isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i - 1)
A[j,i] = 0.5 * A[j,sire[i]]
A[i,j] = A[j,i]
end
A[i,i] = 1
else
for j in 1:(i - 1)
A[j,i] = 0
A[i,j] = 0
end
A[i,i] = 1
end
end
return A
end
"""
renamecolstospec(::DataFrame)
Renames the first four columns of the beefblup data sheet so that they can be referred to by
name instead of by column index, regardless of user input.
"""
function renamecolstospec!(df::DataFrame)
# Pull out the fixed-effect and observation name
othernames = propertynames(df)[5:end]
# Put specification column names and user-defined names together
allnames = cat([:id, :birthdate, :dam, :sire], othernames, dims=1)
# Rename in the DataFrame
rename!(df, allnames, makeunique=true)
return df
end
struct FixedEffect
name::String
basetrait::Any
alltraits::AbstractArray{Any}
end
function FixedEffect(name::String, incidences)
basetrait = last(unique(incidences))
types = unique(incidences)[1:end-1]
return FixedEffect(name, basetrait, types)
end
end

21
test/runtests.jl Normal file
View file

@ -0,0 +1,21 @@
using BeefBLUP
using DataFrames
using Test
@testset "BeefBLUP.jl" begin
# Write your tests here.
correctX = [1 1 0 1; 1 1 0 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 0 1]
fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"])
@test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX
correctA = [1 0 1/2 1/2 1/2 0 0;
0 1 0 0 1/2 1/2 0;
1/2 0 1 1/4 1/4 0 0;
1/2 0 1/4 1 1/4 0 0;
1/2 1/2 1/4 1/4 1 1/4 0;
0 1/2 0 0 1/4 1 0;
0 0 0 0 0 0 1]
id = collect(1:7)
dam_id = [missing, missing, missing, missing, 2, 2, missing]
sire_id = [missing, missing, 1, 1, 1, missing, missing]
@test BeefBLUP.additiverelationshipmatrix(id, dam_id, sire_id) == correctA
end