mirror of
https://github.com/MillironX/beefblup.git
synced 2024-11-10 18:23:08 +00:00
Compare commits
75 commits
Author | SHA1 | Date | |
---|---|---|---|
62f5c4ea65 | |||
683c48aa45 | |||
d4bb72c458 | |||
4b66ad8b1f | |||
91fc553c4c | |||
16aae0469e | |||
97bd43323d | |||
b4e0591ed2 | |||
a9ab1e8641 | |||
3782de85ac | |||
9036ace524 | |||
c318ded0a6 | |||
74f15be400 | |||
091b757b00 | |||
a1d4e6b063 | |||
79e158d83e | |||
94a2e2e124 | |||
f7f09cec90 | |||
12e95d4a7a | |||
7b7ab22123 | |||
cc97c99a68 | |||
a6bd3d7a29 | |||
489274d557 | |||
66fc4b902f | |||
5e1c5f7707 | |||
27199ea748 | |||
e18afb68df | |||
3f55072ecd | |||
15bdebb17d | |||
d87ee85795 | |||
b3162345ff | |||
c3d905ff5e | |||
615766901c | |||
d9fde2af8f | |||
141cf56b07 | |||
181819db28 | |||
f5f1dfad13 | |||
7ce74be4d4 | |||
0a537f9928 | |||
95fd34ce3a | |||
93564b247e | |||
289984be2f | |||
6e311141ac | |||
fa42f3635f | |||
5c5ac1654c | |||
1885aa15ef | |||
530258a2c8 | |||
9b181d251c | |||
610c80e7c9 | |||
f985f4a700 | |||
7cf650992d | |||
5048d3a1b8 | |||
2dd48631ea | |||
6b5b775a83 | |||
b61eae3324 | |||
1928eb3c4f | |||
97ed1662a8 | |||
80a21e42e0 | |||
487f1c30df | |||
6573492bac | |||
0de4b7dc0c | |||
92e03a95ef | |||
74683f03d5 | |||
2882f5b932 | |||
82bf3cfa89 | |||
de0eb6f5a9 | |||
4efeb5d53a | |||
d0b0e79b38 | |||
f450295b20 | |||
5d7ecceef8 | |||
ec3356fcf3 | |||
05ee8018c2 | |||
dbd1dc7b4d | |||
8fc1bc3f4a | |||
6f73731d0a |
25 changed files with 1243 additions and 913 deletions
2
.gitattributes
vendored
2
.gitattributes
vendored
|
@ -15,3 +15,5 @@
|
||||||
*.pdf -crlf -diff -merge
|
*.pdf -crlf -diff -merge
|
||||||
*.jpg -crlf -diff -merge
|
*.jpg -crlf -diff -merge
|
||||||
*.png -crlf -diff -merge
|
*.png -crlf -diff -merge
|
||||||
|
*.jl linguist-language=Julia
|
||||||
|
beefblup linguist-language=Julia
|
||||||
|
|
26
.github/workflows/Documentation.yml
vendored
Normal file
26
.github/workflows/Documentation.yml
vendored
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
name: Documentation
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- develop
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
DISPLAY: ':99.0'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: julia-actions/setup-julia@latest
|
||||||
|
with:
|
||||||
|
version: '1.5'
|
||||||
|
- run: sudo apt-get install xvfb -y
|
||||||
|
- run: xvfb-run --auto-servernum julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
|
||||||
|
- run: xvfb-run --auto-servernum julia --project=docs/ docs/make.jl
|
195
.gitignore
vendored
195
.gitignore
vendored
|
@ -1,60 +1,3 @@
|
||||||
## MATLAB gitignores
|
|
||||||
|
|
||||||
# Windows default autosave extension
|
|
||||||
*.asv
|
|
||||||
|
|
||||||
# OSX / *nix default autosave extension
|
|
||||||
*.m~
|
|
||||||
|
|
||||||
# Compiled MEX binaries (all platforms)
|
|
||||||
*.mex*
|
|
||||||
|
|
||||||
# Packaged app and toolbox files
|
|
||||||
*.mlappinstall
|
|
||||||
*.mltbx
|
|
||||||
|
|
||||||
# Generated helpsearch folders
|
|
||||||
helpsearch*/
|
|
||||||
|
|
||||||
# Simulink code generation folders
|
|
||||||
slprj/
|
|
||||||
sccprj/
|
|
||||||
|
|
||||||
# Matlab code generation folders
|
|
||||||
codegen/
|
|
||||||
|
|
||||||
# Simulink autosave extension
|
|
||||||
*.autosave
|
|
||||||
|
|
||||||
# Octave session info
|
|
||||||
octave-workspace
|
|
||||||
|
|
||||||
## Windows gitignores
|
|
||||||
|
|
||||||
# Windows thumbnail cache files
|
|
||||||
Thumbs.db
|
|
||||||
ehthumbs.db
|
|
||||||
ehthumbs_vista.db
|
|
||||||
|
|
||||||
# Dump file
|
|
||||||
*.stackdump
|
|
||||||
|
|
||||||
# Folder config file
|
|
||||||
[Dd]esktop.ini
|
|
||||||
|
|
||||||
# Recycle Bin used on file shares
|
|
||||||
$RECYCLE.BIN/
|
|
||||||
|
|
||||||
# Windows Installer files
|
|
||||||
*.cab
|
|
||||||
*.msi
|
|
||||||
*.msix
|
|
||||||
*.msm
|
|
||||||
*.msp
|
|
||||||
|
|
||||||
# Windows shortcuts
|
|
||||||
*.lnk
|
|
||||||
|
|
||||||
## Visual Studio Code gitignores
|
## Visual Studio Code gitignores
|
||||||
|
|
||||||
.vscode/*
|
.vscode/*
|
||||||
|
@ -62,145 +5,9 @@ $RECYCLE.BIN/
|
||||||
!.vscode/tasks.json
|
!.vscode/tasks.json
|
||||||
!.vscode/launch.json
|
!.vscode/launch.json
|
||||||
!.vscode/extensions.json
|
!.vscode/extensions.json
|
||||||
.vscode/settings.json
|
!.vscode/settings.json
|
||||||
|
|
||||||
## Microsoft Office gitignores
|
|
||||||
|
|
||||||
*.tmp
|
|
||||||
|
|
||||||
# Word temporary
|
|
||||||
~$*.doc*
|
|
||||||
|
|
||||||
# Word Auto Backup File
|
|
||||||
Backup of *.doc*
|
|
||||||
|
|
||||||
# Excel temporary
|
|
||||||
~$*.xls*
|
|
||||||
|
|
||||||
# Excel Backup File
|
|
||||||
*.xlk
|
|
||||||
|
|
||||||
# PowerPoint temporary
|
|
||||||
~$*.ppt*
|
|
||||||
|
|
||||||
# Visio autosave temporary files
|
|
||||||
*.~vsd*
|
|
||||||
|
|
||||||
## Python gitignores
|
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
wheels/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*.cover
|
|
||||||
.hypothesis/
|
|
||||||
.pytest_cache/
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
local_settings.py
|
|
||||||
db.sqlite3
|
|
||||||
|
|
||||||
# Flask stuff:
|
|
||||||
instance/
|
|
||||||
.webassets-cache
|
|
||||||
|
|
||||||
# Scrapy stuff:
|
|
||||||
.scrapy
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
target/
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# IPython
|
|
||||||
profile_default/
|
|
||||||
ipython_config.py
|
|
||||||
|
|
||||||
# pyenv
|
|
||||||
.python-version
|
|
||||||
|
|
||||||
# celery beat schedule file
|
|
||||||
celerybeat-schedule
|
|
||||||
|
|
||||||
# SageMath parsed files
|
|
||||||
*.sage.py
|
|
||||||
|
|
||||||
# Environments
|
|
||||||
.env
|
|
||||||
.venv
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
env.bak/
|
|
||||||
venv.bak/
|
|
||||||
|
|
||||||
# Spyder project settings
|
|
||||||
.spyderproject
|
|
||||||
.spyproject
|
|
||||||
|
|
||||||
# Rope project settings
|
|
||||||
.ropeproject
|
|
||||||
|
|
||||||
# mkdocs documentation
|
|
||||||
/site
|
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
.dmypy.json
|
|
||||||
dmypy.json
|
|
||||||
|
|
||||||
# Ignore results files
|
# Ignore results files
|
||||||
Results.txt
|
|
||||||
results.txt
|
results.txt
|
||||||
|
|
||||||
### Julia.gitignore
|
### Julia.gitignore
|
||||||
|
|
15
.travis.yml
Normal file
15
.travis.yml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
language: julia
|
||||||
|
os:
|
||||||
|
- linux
|
||||||
|
services:
|
||||||
|
- xvfb
|
||||||
|
julia:
|
||||||
|
- 1.3
|
||||||
|
- nightly
|
||||||
|
matrix:
|
||||||
|
allow_failures:
|
||||||
|
- julia: nightly
|
||||||
|
fast_finish: true
|
||||||
|
notifications:
|
||||||
|
email: false
|
||||||
|
codecov: true
|
18
.vscode/launch.json
vendored
Normal file
18
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "julia",
|
||||||
|
"request": "launch",
|
||||||
|
"name": "beefblup CLI debug",
|
||||||
|
"program": "${file}",
|
||||||
|
"stopOnEntry": false,
|
||||||
|
"cwd": "${workspaceFolder}",
|
||||||
|
"juliaEnv": "${command:activeJuliaEnvironment}",
|
||||||
|
"args": ["data/sample.csv", "-t 0.5"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
10
.vscode/settings.json
vendored
Normal file
10
.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
{
|
||||||
|
"cSpell.words": [
|
||||||
|
"BLUP",
|
||||||
|
"EBVs",
|
||||||
|
"EPDs",
|
||||||
|
"autob",
|
||||||
|
"beefblup",
|
||||||
|
"dairyblup"
|
||||||
|
]
|
||||||
|
}
|
Binary file not shown.
Binary file not shown.
|
@ -1,287 +0,0 @@
|
||||||
# beefblup
|
|
||||||
# Main script for performing single-variate BLUP to find beef cattle
|
|
||||||
# breeding values
|
|
||||||
# Usage: julia beefblup.jl
|
|
||||||
# (C) 2020 Thomas A. Christensen II
|
|
||||||
# Licensed under BSD-3-Clause License
|
|
||||||
|
|
||||||
# Import the required packages
|
|
||||||
using XLSX
|
|
||||||
using LinearAlgebra
|
|
||||||
using Dates
|
|
||||||
using Gtk
|
|
||||||
|
|
||||||
# Display stuff
|
|
||||||
println("beefblup v 0.1")
|
|
||||||
println("(C) 2020 Thomas A. Christensen II")
|
|
||||||
println("https://github.com/millironx/beefblup")
|
|
||||||
print("\n")
|
|
||||||
|
|
||||||
### Prompt User
|
|
||||||
# Ask for an input spreadsheet
|
|
||||||
path = open_dialog_native(
|
|
||||||
"Select a beefblup worksheet",
|
|
||||||
GtkNullContainer(),
|
|
||||||
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ask for an output text filename
|
|
||||||
savepath = save_dialog_native(
|
|
||||||
"Save your beefblup results",
|
|
||||||
GtkNullContainer(),
|
|
||||||
(GtkFileFilter("*.txt", name="Results file"),
|
|
||||||
"*.txt")
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ask for heritability
|
|
||||||
print("What is the heritability for this trait?> ")
|
|
||||||
h2 = parse(Float64, readline(stdin))
|
|
||||||
|
|
||||||
### Import input filename
|
|
||||||
print("[🐮]: Importing Excel file...")
|
|
||||||
|
|
||||||
# Import data from a suitable spreadsheet
|
|
||||||
data = XLSX.readxlsx(path)[1][:]
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
||||||
|
|
||||||
### Process input file
|
|
||||||
print("[🐮]: Processing and formatting data...")
|
|
||||||
|
|
||||||
# Extract the headers into a separate array
|
|
||||||
headers = data[1,:]
|
|
||||||
data = data[2:end,:]
|
|
||||||
|
|
||||||
# Sort the array by date
|
|
||||||
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
|
|
||||||
|
|
||||||
# Define fields to hold id values for animals and their parents
|
|
||||||
ids = string.(data[:,1])
|
|
||||||
damids = string.(data[:,3])
|
|
||||||
sireids = string.(data[:,4])
|
|
||||||
numanimals = length(ids)
|
|
||||||
|
|
||||||
# Find the index values for animals and their parents
|
|
||||||
dam = indexin(damids, ids)
|
|
||||||
sire = indexin(sireids, ids)
|
|
||||||
|
|
||||||
# Store column numbers that need to be deleted
|
|
||||||
# Column 6 contains an intermediate Excel calculation and always need to
|
|
||||||
# be deleted
|
|
||||||
colstokeep = [1, 2, 3, 4, 5]
|
|
||||||
|
|
||||||
# Find any columns that need to be deleted
|
|
||||||
for i in 7:length(headers)
|
|
||||||
if length(unique(data[:,i])) <= 1
|
|
||||||
colname = headers[i]
|
|
||||||
print("Column '")
|
|
||||||
print(colname)
|
|
||||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
|
||||||
else
|
|
||||||
push!(colstokeep, i)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Delete the appropriate columns from the datasheet and the headers
|
|
||||||
data = data[:, colstokeep]
|
|
||||||
headers = headers[colstokeep]
|
|
||||||
|
|
||||||
# Determine how many contemporary groups there are
|
|
||||||
numgroups = ones(1, length(headers)-5)
|
|
||||||
for i in 6:length(headers)
|
|
||||||
numgroups[i-5] = length(unique(data[:,i]))
|
|
||||||
end
|
|
||||||
|
|
||||||
# If there are more groups than animals, then the analysis cannot continue
|
|
||||||
if sum(numgroups) >= numanimals
|
|
||||||
println("There are more contemporary groups than animals. The analysis will
|
|
||||||
now abort.")
|
|
||||||
exit()
|
|
||||||
end
|
|
||||||
|
|
||||||
# Define a "normal" animal as one of the last in the groups, provided that
|
|
||||||
# all traits do not have null values
|
|
||||||
normal = Array{String}(undef,1,length(headers)-5)
|
|
||||||
for i in 6:length(headers)
|
|
||||||
for j in numanimals:-1:1
|
|
||||||
if !ismissing(data[j,i])
|
|
||||||
normal[i-5] = string(data[j,i])
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
||||||
|
|
||||||
### Create the fixed-effect matrix
|
|
||||||
print("[🐮]: Creating the fixed-effect matrix...")
|
|
||||||
|
|
||||||
# Form the fixed-effect matrix
|
|
||||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
|
||||||
X[:,1] = ones(Int8, 1, numanimals)
|
|
||||||
|
|
||||||
# Create an external counter that will increment through both loops
|
|
||||||
counter = 2
|
|
||||||
|
|
||||||
# Store the traits in a string array
|
|
||||||
adjustedtraits =
|
|
||||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
|
||||||
# Iterate through each group
|
|
||||||
for i in 1:length(normal)
|
|
||||||
# Find the traits that are present in this trait
|
|
||||||
localdata = string.(data[:,i+5])
|
|
||||||
traits = unique(localdata)
|
|
||||||
# Remove the normal version from the analysis
|
|
||||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
|
||||||
# Iterate inside of the group
|
|
||||||
for j in 1:length(effecttraits)
|
|
||||||
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
|
||||||
X[matchedindex, counter] .= 1
|
|
||||||
# Add this trait to the string
|
|
||||||
adjustedtraits[counter - 1] = traits[j]
|
|
||||||
# Increment the big counter
|
|
||||||
global counter = counter + 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
||||||
|
|
||||||
### Additive relationship matrix
|
|
||||||
print("[🐮]: Creating additive relationship matrix...")
|
|
||||||
|
|
||||||
# Create an empty matrix for the additive relationship matrix
|
|
||||||
A = zeros(numanimals, numanimals)
|
|
||||||
|
|
||||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
|
||||||
# Henderson
|
|
||||||
for i in 1:numanimals
|
|
||||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
|
||||||
for j in 1:(i-1)
|
|
||||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
|
||||||
A[i,j] = A[j,i]
|
|
||||||
end
|
|
||||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
|
||||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
|
||||||
for j in 1:(i-1)
|
|
||||||
A[j,i] = 0.5*A[j,dam[i]]
|
|
||||||
A[i,j] = A[j,i]
|
|
||||||
end
|
|
||||||
A[i,i] = 1
|
|
||||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
|
||||||
for j in 1:(i-1)
|
|
||||||
A[j,i] = 0.5*A[j,sire[i]]
|
|
||||||
A[i,j] = A[j,i]
|
|
||||||
end
|
|
||||||
A[i,i] = 1
|
|
||||||
else
|
|
||||||
for j in 1:(i-1)
|
|
||||||
A[j,i] = 0
|
|
||||||
A[i,j] = 0
|
|
||||||
end
|
|
||||||
A[i,i] = 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
||||||
|
|
||||||
### Perform BLUP
|
|
||||||
print("[🐮]: Solving the mixed-model equations...")
|
|
||||||
|
|
||||||
# Extract the observed data
|
|
||||||
Y = convert(Array{Float64}, data[:,5])
|
|
||||||
|
|
||||||
# The random effects matrix
|
|
||||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
|
||||||
|
|
||||||
# Remove items where there is no data
|
|
||||||
nullobs = findall(isnothing, Y)
|
|
||||||
Z[nullobs, nullobs] .= 0
|
|
||||||
|
|
||||||
# Calculate heritability
|
|
||||||
λ = (1-h2)/h2
|
|
||||||
|
|
||||||
# Use the mixed-model equations
|
|
||||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
|
||||||
MMY = [X'*Y; Z'*Y]
|
|
||||||
solutions = MME\MMY
|
|
||||||
|
|
||||||
# Find the accuracies
|
|
||||||
diaginv = diag(inv(MME))
|
|
||||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
||||||
|
|
||||||
### Output the results
|
|
||||||
print("[🐮]: Saving results...")
|
|
||||||
|
|
||||||
# Find how many traits we found BLUE for
|
|
||||||
numgroups = numgroups .- 1
|
|
||||||
|
|
||||||
# Start printing results to output
|
|
||||||
fileID = open(savepath, "w")
|
|
||||||
write(fileID, "beefblup Results Report\n")
|
|
||||||
write(fileID, "Produced using beefblup for Julia (")
|
|
||||||
write(fileID, "https://github.com/millironx/beefblup")
|
|
||||||
write(fileID, ")\n\n")
|
|
||||||
write(fileID, "Input:\t")
|
|
||||||
write(fileID, path)
|
|
||||||
write(fileID, "\nAnalysis performed:\t")
|
|
||||||
write(fileID, string(Dates.today()))
|
|
||||||
write(fileID, "\nTrait examined:\t")
|
|
||||||
write(fileID, headers[5])
|
|
||||||
write(fileID, "\n\n")
|
|
||||||
|
|
||||||
# Print base population stats
|
|
||||||
write(fileID, "Base Population:\n")
|
|
||||||
for i in 1:length(numgroups)
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, headers[i+5])
|
|
||||||
write(fileID, ":\t")
|
|
||||||
write(fileID, normal[i])
|
|
||||||
write(fileID, "\n")
|
|
||||||
end
|
|
||||||
write(fileID, "\tMean ")
|
|
||||||
write(fileID, headers[5])
|
|
||||||
write(fileID, ":\t")
|
|
||||||
write(fileID, string(solutions[1]))
|
|
||||||
write(fileID, "\n\n")
|
|
||||||
|
|
||||||
# Contemporary group adjustments
|
|
||||||
counter = 2
|
|
||||||
write(fileID, "Contemporary Group Effects:\n")
|
|
||||||
for i in 1:length(numgroups)
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, headers[i+5])
|
|
||||||
write(fileID, "\tEffect\tReliability\n")
|
|
||||||
for j in 1:numgroups[i]
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, adjustedtraits[counter - 1])
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, string(solutions[counter]))
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, string(reliability[counter]))
|
|
||||||
write(fileID, "\n")
|
|
||||||
|
|
||||||
global counter = counter + 1
|
|
||||||
end
|
|
||||||
write(fileID, "\n")
|
|
||||||
end
|
|
||||||
write(fileID, "\n")
|
|
||||||
|
|
||||||
# Expected breeding values
|
|
||||||
write(fileID, "Expected Breeding Values:\n")
|
|
||||||
write(fileID, "\tID\tEBV\tReliability\n")
|
|
||||||
for i in 1:numanimals
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, ids[i])
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, string(solutions[i+counter-1]))
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, string(reliability[i+counter-1]))
|
|
||||||
write(fileID, "\n")
|
|
||||||
end
|
|
||||||
|
|
||||||
write(fileID, "\n - END REPORT -")
|
|
||||||
close(fileID)
|
|
||||||
|
|
||||||
print("Done!\n")
|
|
|
@ -1,13 +0,0 @@
|
||||||
# beefblup install
|
|
||||||
# Prepares the Julia environment for using beefblup by installing the requisite
|
|
||||||
# packages
|
|
||||||
# Usage: julia install.jl
|
|
||||||
# (C) 2020 Thomas A. Christensen II
|
|
||||||
# Licensed under BSD-3-Clause License
|
|
||||||
|
|
||||||
# Import the package manager
|
|
||||||
using Pkg
|
|
||||||
|
|
||||||
# Install requisite packages
|
|
||||||
Pkg.add("XLSX")
|
|
||||||
Pkg.add("Gtk")
|
|
|
@ -1,19 +1,19 @@
|
||||||
BSD 3-Clause License
|
BSD 3-Clause License
|
||||||
|
|
||||||
Copyright (c) 2020, Thomas A. Christensen II
|
Copyright (c) 2021, Thomas A. Christensen II
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
modification, are permitted provided that the following conditions are met:
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice, this
|
1. Redistributions of source code must retain the above copyright notice, this
|
||||||
list of conditions and the following disclaimer.
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
this list of conditions and the following disclaimer in the documentation
|
this list of conditions and the following disclaimer in the documentation
|
||||||
and/or other materials provided with the distribution.
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
* Neither the name of the copyright holder nor the names of its
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
contributors may be used to endorse or promote products derived from
|
contributors may be used to endorse or promote products derived from
|
||||||
this software without specific prior written permission.
|
this software without specific prior written permission.
|
||||||
|
|
|
@ -1,341 +0,0 @@
|
||||||
% beefblup
|
|
||||||
% Main script for performing single-variate BLUP to find beef cattle
|
|
||||||
% breeding values
|
|
||||||
% Usage: beefblup
|
|
||||||
% (C) 2020 Thomas A. Christensen II
|
|
||||||
% Licensed under BSD-3-Clause License
|
|
||||||
|
|
||||||
% Prepare the workspace for computation
|
|
||||||
clear
|
|
||||||
clc
|
|
||||||
close all
|
|
||||||
|
|
||||||
%% Display stuff
|
|
||||||
disp('beefblup v. 0.1')
|
|
||||||
disp('(C) 2020 Thomas A. Christensen II')
|
|
||||||
disp('https://github.com/millironx/beefblup')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Prompt User
|
|
||||||
% Ask for an input spreadsheet
|
|
||||||
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
|
|
||||||
|
|
||||||
% Ask for an ouput text file
|
|
||||||
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
|
|
||||||
|
|
||||||
% Ask for heritability
|
|
||||||
h2 = input('What is the heritability for this trait? >> ');
|
|
||||||
|
|
||||||
|
|
||||||
%% Import input file
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Importing Excel file...')
|
|
||||||
|
|
||||||
% Import data from a suitable spreadsheet
|
|
||||||
fullname = [path name];
|
|
||||||
clear name path
|
|
||||||
[~, ~, data] = xlsread(fullname);
|
|
||||||
|
|
||||||
disp('Importing Excel file... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Process input file
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Processing and formatting data...')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
% Extract the headers into a separate array
|
|
||||||
headers = data(1,:);
|
|
||||||
data(1,:) = [];
|
|
||||||
|
|
||||||
% Convert the string dates to numbers
|
|
||||||
data(:,2) = num2cell(datenum(data(:,2)));
|
|
||||||
|
|
||||||
% Sort the array by date
|
|
||||||
data = sortrows(data,2);
|
|
||||||
|
|
||||||
% Coerce all id fields to string format
|
|
||||||
strings = [1 3 4];
|
|
||||||
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
|
|
||||||
|
|
||||||
% Define fields to hold id values for animals and their parents
|
|
||||||
ids = char(data{:,1});
|
|
||||||
damids = char(data{:,3});
|
|
||||||
sireids = char(data{:,4});
|
|
||||||
numanimals = length(data(:,1));
|
|
||||||
|
|
||||||
% Define fields to hold the index values for animals and their parents
|
|
||||||
dam = zeros(numanimals,1);
|
|
||||||
sire = zeros(numanimals,1);
|
|
||||||
|
|
||||||
% Find all row numbers where an animal was a parent
|
|
||||||
for i=1:numanimals
|
|
||||||
% Find all animals that this animal birthed
|
|
||||||
dammatch = ismember(damids, ids(i,:), 'rows');
|
|
||||||
damindexes = find(dammatch == 1);
|
|
||||||
dam(damindexes) = i;
|
|
||||||
|
|
||||||
% Find all animals that this animal sired
|
|
||||||
sirematch = ismember(sireids, ids(i,:), 'rows');
|
|
||||||
sireindexes = find(sirematch == 1);
|
|
||||||
sire(sireindexes) = i;
|
|
||||||
end
|
|
||||||
|
|
||||||
% Store column numbers that need to be deleted
|
|
||||||
% Column 6 contains an intermediate Excel calculation and always needs to
|
|
||||||
% be deleted
|
|
||||||
colstodelete = 6;
|
|
||||||
|
|
||||||
% Coerce each group to string format
|
|
||||||
for i = 7:length(headers)
|
|
||||||
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
|
|
||||||
end
|
|
||||||
|
|
||||||
% Find any columns that need to be deleted
|
|
||||||
for i = 7:length(headers)
|
|
||||||
if length(uniquecell(data(:,i))) <= 1
|
|
||||||
colname = headers{i};
|
|
||||||
disp(['Column "' colname '" does not have any unique animals and will be removed'])
|
|
||||||
disp('from this analysis');
|
|
||||||
colstodelete = [colstodelete i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
% Delete the appropriate columns from the datasheet and the headers
|
|
||||||
data(:,colstodelete) = [];
|
|
||||||
headers(colstodelete) = [];
|
|
||||||
|
|
||||||
% Determine how many contemporary groups there are
|
|
||||||
numgroups = ones(1, length(headers)-5);
|
|
||||||
for i = 6:length(headers)
|
|
||||||
numgroups(i-5) = length(uniquecell(data(:,i)));
|
|
||||||
end
|
|
||||||
|
|
||||||
% If there are more groups than animals, then the analysis cannot continue
|
|
||||||
if sum(numgroups) >= numanimals
|
|
||||||
disp('There are more contemporary groups than animals. The analysis will now abort.');
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
% Define a "normal" animal as one of the last in the groups, provided that
|
|
||||||
% all traits do not have null values
|
|
||||||
normal = cell([1 length(headers)-5]);
|
|
||||||
for i = 6:length(headers)
|
|
||||||
for j = numanimals:-1:1
|
|
||||||
if not(cellfun(@isempty, data(j,i)))
|
|
||||||
normal(i - 5) = data(j,i);
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
% Print the results of the "normal" definition
|
|
||||||
disp(' ')
|
|
||||||
disp('For the purposes of this analysis, a "normal" animal will be defined')
|
|
||||||
disp('by the following traits:')
|
|
||||||
for i = 6:length(headers)
|
|
||||||
disp([headers{i} ': ' normal{i-5}])
|
|
||||||
end
|
|
||||||
disp(' ')
|
|
||||||
disp('If no animal matching this description exists, the results may appear')
|
|
||||||
disp('outlandish, but are still as correct as the accuracy suggests')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
disp('Processing and formatting data... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Create the fixed-effect matrix
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Creating the fixed-effect matrix...')
|
|
||||||
|
|
||||||
% Form the fixed effect matrix
|
|
||||||
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
|
|
||||||
X(:,1) = ones(1, numanimals);
|
|
||||||
|
|
||||||
% Create an external counter that will increment through both loops
|
|
||||||
I = 2;
|
|
||||||
|
|
||||||
% Store the traits in a string cell array
|
|
||||||
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
|
|
||||||
|
|
||||||
% Iterate through each group
|
|
||||||
for i = 1:length(normal)
|
|
||||||
% Find the traits that are present in this trait
|
|
||||||
traits = uniquecell(data(:,i+5));
|
|
||||||
|
|
||||||
% Remove the "normal" version from the analysis
|
|
||||||
normalindex = find(strcmp(traits, normal{i}));
|
|
||||||
traits(normalindex) = [];
|
|
||||||
|
|
||||||
% Iterate inside of the group
|
|
||||||
for j = 1:length(traits)
|
|
||||||
matchedindex = find(strcmp(data(:,i+5), traits{j}));
|
|
||||||
X(matchedindex, I) = 1;
|
|
||||||
|
|
||||||
% Add this trait to the string
|
|
||||||
adjustedtraits(I - 1) = traits(j);
|
|
||||||
|
|
||||||
% Increment the big counter
|
|
||||||
I = I + 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
disp('Creating the fixed-effect matrix... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Additive relationship matrix
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Creating the additive relationship matrix...')
|
|
||||||
|
|
||||||
% Create an empty matrix for the additive relationship matrix
|
|
||||||
A = zeros(numanimals, numanimals);
|
|
||||||
|
|
||||||
% Create the additive relationship matrix by the FORTRAN method presented
|
|
||||||
% by Henderson
|
|
||||||
for i = 1:numanimals
|
|
||||||
if dam(i) ~= 0 && sire(i) ~= 0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*(A(j,sire(i))+A(j,dam(i)));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1 + 0.5*A(sire(i),dam(i));
|
|
||||||
elseif dam(i) ~= 0 && sire(i) == 0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*A(j,dam(i));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
elseif dam(i) == 0 && sire(i) ~=0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*A(j,sire(i));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
else
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0;
|
|
||||||
A(i,j) = 0;
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
disp('Creating the additive relationship matrix... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Perform BLUP
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Solving the mixed-model equations')
|
|
||||||
|
|
||||||
% Extract the observed data
|
|
||||||
Y = cell2mat(data(:, 5));
|
|
||||||
|
|
||||||
% The identity matrix for random effects
|
|
||||||
Z = eye(numanimals, numanimals);
|
|
||||||
|
|
||||||
% Remove items where there is no data
|
|
||||||
nullobs = find(isnan(Y));
|
|
||||||
Z(nullobs, nullobs) = 0;
|
|
||||||
|
|
||||||
% Calculate heritability
|
|
||||||
lambda = (1-h2)/h2;
|
|
||||||
|
|
||||||
% Use the mixed-model equations
|
|
||||||
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
|
|
||||||
|
|
||||||
% Find the accuracies
|
|
||||||
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
|
|
||||||
reliability = 1 - diaginv.*lambda;
|
|
||||||
|
|
||||||
disp('Solving the mixed-model equations... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Output the results
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Saving results...')
|
|
||||||
|
|
||||||
% Find how many traits we found BLUE for
|
|
||||||
numgroups = numgroups - 1;
|
|
||||||
|
|
||||||
% Start printing results to output
|
|
||||||
fileID = fopen([savepath savename], 'w');
|
|
||||||
fprintf(fileID, 'beefblup Results Report\n');
|
|
||||||
fprintf(fileID, 'Produced using beefblup for MATLAB (');
|
|
||||||
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
|
|
||||||
fprintf(fileID, ')\n\n');
|
|
||||||
fprintf(fileID, 'Input:\t');
|
|
||||||
fprintf(fileID, '%s', fullname);
|
|
||||||
fprintf(fileID, '\nAnalysis performed:\t');
|
|
||||||
fprintf(fileID, date);
|
|
||||||
fprintf(fileID, '\nTrait examined:\t');
|
|
||||||
fprintf(fileID, [headers{5}]);
|
|
||||||
fprintf(fileID, '\n\n');
|
|
||||||
|
|
||||||
% Print base population stats
|
|
||||||
fprintf(fileID, 'Base Population:\n');
|
|
||||||
for i = 1:length(numgroups)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [headers{i+5}]);
|
|
||||||
fprintf(fileID, ':\t');
|
|
||||||
fprintf(fileID, [normal{i}]);
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\tMean ');
|
|
||||||
fprintf(fileID, [headers{5}]);
|
|
||||||
fprintf(fileID, ':\t');
|
|
||||||
fprintf(fileID, num2str(solutions(1)));
|
|
||||||
fprintf(fileID, '\n\n');
|
|
||||||
|
|
||||||
I = 2;
|
|
||||||
|
|
||||||
% Contemporary group adjustments
|
|
||||||
fprintf(fileID, 'Contemporary Group Effects:\n');
|
|
||||||
for i = 1:length(numgroups)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [headers{i+5}]);
|
|
||||||
fprintf(fileID, '\tEffect\tReliability\n');
|
|
||||||
for j = 1:numgroups(i)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [adjustedtraits{I-1}]);
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(solutions(I)));
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(reliability(I)));
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
|
|
||||||
I = I + 1;
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
|
|
||||||
% Expected breeding values
|
|
||||||
fprintf(fileID, 'Expected Breeding Values:\n');
|
|
||||||
fprintf(fileID, '\tID\tEBV\tReliability\n');
|
|
||||||
for i = 1:numanimals
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [data{i,1}]);
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(solutions(i+I-1)));
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(reliability(i+I-1)));
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
|
|
||||||
fprintf(fileID, '\n - END REPORT -');
|
|
||||||
fclose(fileID);
|
|
||||||
|
|
||||||
disp('Saving results... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
|
@ -1,9 +0,0 @@
|
||||||
% uniquenan
|
|
||||||
% Serves the same purpose as UNIQUE, but ensures any empty cells are not
|
|
||||||
% counted
|
|
||||||
function y = uniquecell(x)
|
|
||||||
y = unique(x);
|
|
||||||
if any(cellfun(@isempty, y))
|
|
||||||
y(cellfun(@isempty, y)) = [];
|
|
||||||
end
|
|
||||||
end
|
|
21
Project.toml
Normal file
21
Project.toml
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
name = "BeefBLUP"
|
||||||
|
uuid = "03993faf-e476-444a-86c9-f31e8122fa24"
|
||||||
|
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||||
|
version = "0.3.0"
|
||||||
|
|
||||||
|
[deps]
|
||||||
|
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||||
|
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||||
|
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||||
|
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||||
|
Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
|
||||||
|
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||||
|
|
||||||
|
[compat]
|
||||||
|
julia = "1.3"
|
||||||
|
|
||||||
|
[extras]
|
||||||
|
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||||
|
|
||||||
|
[targets]
|
||||||
|
test = ["Test"]
|
81
README.md
81
README.md
|
@ -1,5 +1,9 @@
|
||||||
# [:cow:]: beefblup
|
# [:cow:]: beefblup
|
||||||
|
|
||||||
|
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://millironx.github.io/beefblup/stable)
|
||||||
|
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://MillironX.github.io/beefblup/dev)
|
||||||
|
[![Build Status](https://travis-ci.com/millironx/beefblup.svg?branch=develop)](https://travis-ci.com/millironx/beefblup)
|
||||||
|
[![Coverage](https://codecov.io/gh/millironx/beefblup/branch/develop/graph/badge.svg)](https://codecov.io/gh/millironx/beefblup)
|
||||||
[![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md)
|
[![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md)
|
||||||
[![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
[![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||||
[![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases)
|
[![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases)
|
||||||
|
@ -13,35 +17,34 @@ Why? It's part of my effort to
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
||||||
2. Open a new Julia window and type the `]` key
|
2. Download the [beefblup ZIP
|
||||||
3. Type `add XLSX Gtk` and press **Enter**
|
file](https://github.com/MillironX/beefblup/archive/refs/tags/v0.2.2.zip) and unzip it someplace memorable
|
||||||
|
3. In your file explorer, copy the address of the "beefblup" folder
|
||||||
Alternatively, you can run the [install
|
4. Launch Julia
|
||||||
script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from
|
5. Type `cd("<the address copied in step 5>")` and press **Enter** (For example,
|
||||||
Julia.
|
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||||
|
6. Type the `]` key
|
||||||
|
7. Type `activate .` and press **Enter**
|
||||||
|
8. Type `instantiate` and press **Enter**
|
||||||
|
9. Installation is done: you can close the Julia window
|
||||||
|
|
||||||
## How to Use
|
## How to Use
|
||||||
|
|
||||||
> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118).
|
1. Make a copy of the "sample.csv" spreadsheet and replace the data with your own
|
||||||
> Although it's tempting to just open up beefblup in Juno and press the big play
|
1. The trait you wish to calculate EBVs for always goes in the rightmost column
|
||||||
> button, it won't work. Follow these instructions until it's fixed. If you
|
2. If you wish to add more contemporary group traits to your analysis, include them before the rightmost column
|
||||||
> don't know what Juno is: ignore this message.
|
2. Save and close
|
||||||
|
3. In your file explorer, copy the address of the "beefblup" folder
|
||||||
1. Download the [beefblup ZIP
|
4. Launch Julia
|
||||||
file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it
|
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||||
someplace memorable
|
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||||
2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own
|
6. Type the `]` key
|
||||||
3. If you wish to add more contemporary group traits to your analysis, replace
|
7. Type `activate .` and press **Enter**
|
||||||
or add them to the right of the Purple section
|
8. Press **Backspace**
|
||||||
4. Save and close
|
9. Type `include("src/beefblup.jl")` and press **Enter**
|
||||||
5. In your file explorer, copy the address of the "Julia" folder
|
10. Select the spreadsheet you created in steps 1-4
|
||||||
6. Launch Julia
|
11. Follow the on-screen prompts
|
||||||
7. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
12. **#KeepEPDsReal!**
|
||||||
`cd("C:\Users\MillironX\Documents\beefblup\Julia")`)
|
|
||||||
8. Type `include("beefblup.jl")` and press **Enter**
|
|
||||||
9. Select the spreadsheet you created in steps 1-4
|
|
||||||
10. Follow the on-screen prompts
|
|
||||||
11. **#KeepEPDsReal!**
|
|
||||||
|
|
||||||
## For Programmers
|
## For Programmers
|
||||||
|
|
||||||
|
@ -51,19 +54,19 @@ Julia.
|
||||||
|
|
||||||
### Development Roadmap
|
### Development Roadmap
|
||||||
|
|
||||||
| Version | Feature |
|
| Version | Feature | Status |
|
||||||
| ------- | ------------------------------------------------------------------- |
|
| ------- | ----------------------------------------------------------------------------------- | ------------------ |
|
||||||
| v0.1 | Julia port of original MATLAB script |
|
| v0.1 | Julia port of original MATLAB script | :heavy_check_mark: |
|
||||||
| v0.2 | Spreadsheet format redesign |
|
| v0.2 | Spreadsheet format redesign | :heavy_check_mark: |
|
||||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) |
|
| v0.3 | API rewrite (change to function calls and package format instead of script running) | |
|
||||||
| v0.4 | Add GUI for all options |
|
| v0.4 | Add GUI for all options | |
|
||||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects |
|
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | |
|
||||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) |
|
| v0.6 | Repeated measurement BLUP (aka dairyblup) | |
|
||||||
| v0.7 | Multiple trait BLUP |
|
| v0.7 | Multiple trait BLUP | |
|
||||||
| v0.8 | Maternal effects BLUP |
|
| v0.8 | Maternal effects BLUP | |
|
||||||
| v0.9 | Genomic BLUP |
|
| v0.9 | Genomic BLUP | |
|
||||||
| v0.10 | beefblup binaries |
|
| v0.10 | beefblup binaries | |
|
||||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) |
|
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | |
|
||||||
|
|
||||||
### Bug Reports
|
### Bug Reports
|
||||||
|
|
||||||
|
|
47
beefblup
Executable file
47
beefblup
Executable file
|
@ -0,0 +1,47 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# -*- mode: julia -*-
|
||||||
|
#=
|
||||||
|
exec julia --project="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" "${BASH_SOURCE[0]}" "$@"
|
||||||
|
=#
|
||||||
|
# beefblup cli
|
||||||
|
# Provides a convenient CLI for using beefblup
|
||||||
|
# cSpell:includeRegExp #.*
|
||||||
|
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||||
|
|
||||||
|
# Import packages
|
||||||
|
using BeefBLUP
|
||||||
|
using ArgParse
|
||||||
|
|
||||||
|
# If this is run without arguments, catch that before parsing
|
||||||
|
if isempty(ARGS)
|
||||||
|
BeefBLUP.beefblup()
|
||||||
|
exit()
|
||||||
|
end
|
||||||
|
|
||||||
|
# Setup the argument table
|
||||||
|
argsettings = ArgParseSettings()
|
||||||
|
@add_arg_table argsettings begin
|
||||||
|
"datafile"
|
||||||
|
help = "datasheet containing the pedigree, fixed effects, and response trait formatted in CSV"
|
||||||
|
required = true
|
||||||
|
arg_type = String
|
||||||
|
"resultsfile"
|
||||||
|
help = "filename for storing the results of the analysis"
|
||||||
|
required = false
|
||||||
|
arg_type = String
|
||||||
|
"--heritability", "-t"
|
||||||
|
help = "heritability of the response trait being analyzed"
|
||||||
|
required = true
|
||||||
|
arg_type = Float64
|
||||||
|
end
|
||||||
|
|
||||||
|
arguments = parse_args(argsettings)
|
||||||
|
|
||||||
|
h2 = arguments["heritability"]
|
||||||
|
|
||||||
|
if isnothing(arguments["resultsfile"])
|
||||||
|
BeefBLUP.beefblup(arguments["datafile"], h2)
|
||||||
|
exit()
|
||||||
|
end
|
||||||
|
|
||||||
|
BeefBLUP.beefblup(arguments["datafile"], arguments["resultsfile"], h2)
|
8
data/sample.csv
Normal file
8
data/sample.csv
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
id,birthdate,dam,sire,year,sex,species,weaning_weight
|
||||||
|
1,1/1/1990,,,1990,male,cow,354
|
||||||
|
2,1/1/1990,,,1990,female,cow,251
|
||||||
|
3,1/1/1991,,1,1991,male,cow,327
|
||||||
|
4,1/1/1991,,1,1991,female,cow,328
|
||||||
|
5,1/1/1991,2,1,1991,male,cow,301
|
||||||
|
6,1/1/1991,2,,1991,female,cow,270
|
||||||
|
7,1/1/1992,,,1992,male,cow,330
|
|
3
docs/Project.toml
Normal file
3
docs/Project.toml
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
[deps]
|
||||||
|
BeefBLUP = "03993faf-e476-444a-86c9-f31e8122fa24"
|
||||||
|
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
26
docs/make.jl
Normal file
26
docs/make.jl
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
using BeefBLUP
|
||||||
|
using Documenter
|
||||||
|
|
||||||
|
DocMeta.setdocmeta!(BeefBLUP, :DocTestSetup, :(using BeefBLUP); recursive=true)
|
||||||
|
|
||||||
|
makedocs(;
|
||||||
|
modules=[BeefBLUP],
|
||||||
|
authors="Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>",
|
||||||
|
repo="https://github.com/MillironX/beefblup/blob/{commit}{path}#{line}",
|
||||||
|
sitename="beefblup",
|
||||||
|
format=Documenter.HTML(;
|
||||||
|
prettyurls=get(ENV, "CI", "false") == "true",
|
||||||
|
canonical="https://millironx.com/beefblup",
|
||||||
|
assets=String[],
|
||||||
|
),
|
||||||
|
pages=[
|
||||||
|
"Home" => "index.md",
|
||||||
|
"How to Calculate EPDs" => "how-to-calculate-epds.md",
|
||||||
|
"CLI Reference (WIP)" => "beefblup-cli.md"
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
deploydocs(;
|
||||||
|
repo="github.com/MillironX/beefblup",
|
||||||
|
devbranch="develop",
|
||||||
|
)
|
3
docs/src/.markdownlint.json
Normal file
3
docs/src/.markdownlint.json
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
{
|
||||||
|
"MD041": false
|
||||||
|
}
|
106
docs/src/beefblup-cli.md
Normal file
106
docs/src/beefblup-cli.md
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
```@meta
|
||||||
|
CurrentModule = BeefBLUP
|
||||||
|
```
|
||||||
|
|
||||||
|
# beefblup Command Line Interface (CLI) documentation
|
||||||
|
|
||||||
|
> _A work in progress_
|
||||||
|
|
||||||
|
**Notice:** This document is a draft for what the command-line interface for
|
||||||
|
beefblup would look like as of version 1.0, if beefblup was even a command-line
|
||||||
|
application to begin with (it's not). It is modeled (loosely) after the man page
|
||||||
|
format. It is not intended to be taken seriously, but instead to serve as a
|
||||||
|
useful thought experiment and brainstorming ground on the future of beefblup.
|
||||||
|
Please use it if it clarifies things for you. If it doesn't, ignore it.
|
||||||
|
|
||||||
|
## Input file
|
||||||
|
|
||||||
|
beefblup requires a very specific format of input file. The format may be in
|
||||||
|
comma-separated values (CSV) or Excel 2007+ (XLSX) format. CSV files should not
|
||||||
|
be quoted (and therefore cannot have commas within cell values). Other formats
|
||||||
|
may be forthcoming.
|
||||||
|
|
||||||
|
A beefblup data file must have at least six columns appearing in this order:
|
||||||
|
|
||||||
|
- ID
|
||||||
|
- Sire ID
|
||||||
|
- Dam ID
|
||||||
|
- Birthdate
|
||||||
|
- Fixed effect(s)
|
||||||
|
- Response variable(s)
|
||||||
|
|
||||||
|
The first row always contains column names. The values of column names are
|
||||||
|
unimportant for the first four columns, as they will always be treated the same
|
||||||
|
regardless of the name. The generated report will use the column names
|
||||||
|
of fixed effects and response variables as given.
|
||||||
|
|
||||||
|
Each fixed effect should have its own column, to as many as are needed. There is
|
||||||
|
no limit to the number of fixed effects as defined by beefblup, however its
|
||||||
|
dependencies might have some. The same rules apply to response variables.
|
||||||
|
|
||||||
|
Unknown values should be left blank (`,,`). Do not substitute null placeholders
|
||||||
|
(e.g. `NULL`, `NA`, `0`, `nothing`, `undefined`, etc.) for unknown values.
|
||||||
|
|
||||||
|
An example spreadsheet might have the following format
|
||||||
|
|
||||||
|
| ID | Sire ID | Dam ID | Birthdate | Sex | Weaning Weight |
|
||||||
|
| --- | ------- | ------ | --------- | ------ | -------------- |
|
||||||
|
| 1 | | | 1/1/1990 | Male | 354 |
|
||||||
|
| 2 | 1 | | 1/1/1990 | Female | 251 |
|
||||||
|
| 3 | 1 | | 1/1/1991 | Male | 327 |
|
||||||
|
| 4 | 1 | 2 | 1/1/1991 | Female | 328 |
|
||||||
|
| 5 | | 2 | 1/1/1991 | Male | 301 |
|
||||||
|
| 6 | | | 1/1/1991 | Female | 270 |
|
||||||
|
| 7 | | | 1/1/1992 | Male | 330 |
|
||||||
|
|
||||||
|
## Synopsis
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup [-G SNPs_file] [-M num_response_vars] [-o report_spreadsheet]
|
||||||
|
[--no-aod] [--no-year] [--no-season] [--no-autob] [--maternal] input_file
|
||||||
|
[report_file]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Command line basic syntax
|
||||||
|
|
||||||
|
The most basic input is to simply pass the input file name to the program.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup filename.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
In this case beefblup will insert fixed-effects for age-of-dam, year, and
|
||||||
|
season, and will calculate the EBVs for the response variable in the final
|
||||||
|
column. The report will then be saved as `filename_report.txt`.
|
||||||
|
|
||||||
|
## Suppressing automatically-calculated fixed effects
|
||||||
|
|
||||||
|
If you don't wish to include one of the automatically calculated fixed-effects
|
||||||
|
from your model, you can pass arguments to suppress them.
|
||||||
|
|
||||||
|
### Suppress Age-of-dam
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup --no-aod filename.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Suppress year
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup --no-year filename.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Suppress season
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup --no-season filename.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Suppress all calculated fixed effects
|
||||||
|
|
||||||
|
```bash
|
||||||
|
beefblup --no-autob filename.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
The argument `--no-autob` comes from the nomenclature of assigning fixed-effects
|
||||||
|
to the matrix _b_ in Henderson's mixed-model equations.
|
542
docs/src/how-to-calculate-epds.md
Normal file
542
docs/src/how-to-calculate-epds.md
Normal file
|
@ -0,0 +1,542 @@
|
||||||
|
```@meta
|
||||||
|
CurrentModule = BeefBLUP
|
||||||
|
```
|
||||||
|
|
||||||
|
# How to Calculate EPDs
|
||||||
|
|
||||||
|
Not to exclude our Australian comrades or our dairy friends, this guide could
|
||||||
|
alternately be called
|
||||||
|
|
||||||
|
- How to Calculate Expected Breeding Values (EBVs)
|
||||||
|
- How to Calculate Predicted Transmitting Abilities (PTAs)
|
||||||
|
- How to Calculate Expected Progeny Differences (EPDs)
|
||||||
|
|
||||||
|
Since I'm mostly talking to American beef producers, though, we'll stick with
|
||||||
|
EPDs for most of this discussion.
|
||||||
|
|
||||||
|
Expected Breeding Values (EBVs) (which are more often halved and published as
|
||||||
|
Expected Progeny Differences (EPDs) or Predicted Transmitting Abilities (PTAs)
|
||||||
|
in the United States) are generally found using Charles Henderson's linear
|
||||||
|
mixed-model equations. Great, you say, what is that? I'm glad you asked...
|
||||||
|
|
||||||
|
## The mathematical model
|
||||||
|
|
||||||
|
Every genetics textbook starts with the following equation
|
||||||
|
|
||||||
|
```math
|
||||||
|
P = G + E
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
|
||||||
|
- ``P`` = phenotype
|
||||||
|
- ``G`` = genotype (think: breeding value)
|
||||||
|
- ``E`` = environmental factors
|
||||||
|
|
||||||
|
Now, we can't identify _every_ environmental factor that affects phenotype, but
|
||||||
|
we can identify some of them, so let's substitute ``E`` with some absolutes. A
|
||||||
|
good place to start is the "contemporary group" listings for the trait of
|
||||||
|
interest in the
|
||||||
|
[BIF Guidelines](https://beefimprovement.org/wp-content/uploads/2018/03/BIFGuidelinesFinal_updated0318.pdf),
|
||||||
|
though for the purposes of this example, I'm only going to consider sex, and
|
||||||
|
birth year.
|
||||||
|
|
||||||
|
```math
|
||||||
|
P = G + E_{year} + E_{sex}
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
|
||||||
|
- ``E_n`` is the effect of ``n`` on the phenotype
|
||||||
|
|
||||||
|
Now let's say I want to find the weaning weight breeding value (``G``) of my
|
||||||
|
favorite herd bull. I compile his stats, and then plug them into the equation
|
||||||
|
and solve for ``G``, right? Let's try that.
|
||||||
|
|
||||||
|
### Calf Records
|
||||||
|
|
||||||
|
| ID | Birth Year | Sex | YW (kg) |
|
||||||
|
|:-- | :--------- | :----- |:------- |
|
||||||
|
| 1 | 1990 | Male | 354 |
|
||||||
|
|
||||||
|
```math
|
||||||
|
354 \ \textup{kg} = G_1 + E_{1990} + E_{male}
|
||||||
|
```
|
||||||
|
|
||||||
|
Hmm. I just realized I don't know any of those ``E`` values. Come to think of it,
|
||||||
|
I remember from math class that I will need as many equations as I have
|
||||||
|
unknowns, so I will add equations for other animals that I have records for.
|
||||||
|
|
||||||
|
### Calf Records
|
||||||
|
|
||||||
|
| ID | Birth Year | Sex | YW (kg) |
|
||||||
|
|:--- |:---------- |:------ |:------- |
|
||||||
|
| 1 | 1990 | Male | 354 |
|
||||||
|
| 2 | 1990 | Female | 251 |
|
||||||
|
| 3 | 1991 | Male | 327 |
|
||||||
|
| 4 | 1991 | Female | 328 |
|
||||||
|
| 5 | 1991 | Male | 301 |
|
||||||
|
| 6 | 1991 | Female | 270 |
|
||||||
|
| 7 | 1992 | Male | 330 |
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{aligned}
|
||||||
|
251 \ \textup{kg} &= G_2 + E_{1990} + E_{female} \\
|
||||||
|
327 \ \textup{kg} &= G_3 + E_{1991} + E_{male} \\
|
||||||
|
328 \ \textup{kg} &= G_4 + E_{1991} + E_{female} \\
|
||||||
|
301 \ \textup{kg} &= G_5 + E_{1991} + E_{male} \\
|
||||||
|
270 \ \textup{kg} &= G_6 + E_{1991} + E_{female} \\
|
||||||
|
330 \ \textup{kg} &= G_7 + E_{1992} + E_{male}
|
||||||
|
\end{aligned}
|
||||||
|
```
|
||||||
|
|
||||||
|
Drat! Every animal I added brings more variables into the system than it
|
||||||
|
eliminates! In fact, since each cow brings in _at least_ one term
|
||||||
|
(``G_n``), I will never be able to write enough equations to solve for
|
||||||
|
``G`` numerically. I will have to use a different approach.
|
||||||
|
|
||||||
|
## The statistical model: the setup
|
||||||
|
|
||||||
|
Since I can never solve for ``G`` directly, I will have to find some way to
|
||||||
|
estimate it. I can switch to a statistical model and solve for ``G`` that way. The
|
||||||
|
caveat with a statistical model is that there will be some level of error, but
|
||||||
|
so long as we know and can control the level of error, that will be better than
|
||||||
|
not knowing ``G`` at all.
|
||||||
|
|
||||||
|
Since we're switching into a statistical space, we should also switch the
|
||||||
|
variables we're using. I'll rewrite the first equation as
|
||||||
|
|
||||||
|
```math
|
||||||
|
y = b + u + e
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
|
||||||
|
- ``y`` = Phenotype
|
||||||
|
- ``b`` = Environment
|
||||||
|
- ``u`` = Genotype
|
||||||
|
- ``e`` = Error
|
||||||
|
|
||||||
|
It's not as easy as simply substituting ``b`` for every ``E`` that we had above,
|
||||||
|
however. The reason for that is that we must make the assumption that
|
||||||
|
environment is a **fixed effect** and that genotype is a **random effect**. I'll
|
||||||
|
go over why that is later, but for now, understand that we need to transform the
|
||||||
|
environment terms and genotype terms separately.
|
||||||
|
|
||||||
|
We'll start with the environment terms.
|
||||||
|
|
||||||
|
## The statistical model: environment as fixed effects
|
||||||
|
|
||||||
|
To properly transform the equations, I will have to introduce
|
||||||
|
``b_{mean}`` terms in each animal's equation. This is part of the fixed
|
||||||
|
effect statistical assumption, and it will let us obtain a solution.
|
||||||
|
|
||||||
|
Here are the transformed equations:
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{aligned}
|
||||||
|
354 \ \textup{kg} &= u_1 + b_{mean} + b_{1990} + b_{male} + e_1 \\
|
||||||
|
251 \ \textup{kg} &= u_2 + b_{mean} + b_{1990} + b_{female} + e_2 \\
|
||||||
|
327 \ \textup{kg} &= u_3 + b_{mean} + b_{1991} + b_{male} + e_3 \\
|
||||||
|
328 \ \textup{kg} &= u_4 + b_{mean} + b_{1991} + b_{female} +e_4 \\
|
||||||
|
301 \ \textup{kg} &= u_5 + b_{mean} + b_{1991} + b_{male} + e_5 \\
|
||||||
|
270 \ \textup{kg} &= u_6 + b_{mean} + b_{1991} + b_{female} + e_6 \\
|
||||||
|
330 \ \textup{kg} &= u_7 + b_{mean} + b_{1992} + b_{male} + e_7
|
||||||
|
\end{aligned}
|
||||||
|
```
|
||||||
|
|
||||||
|
Statistical methods work best in matrix form, so I'm going to convert the set of
|
||||||
|
equations above to a single matrix equation that means the exact same thing.
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
354 \ \textup{kg} \\
|
||||||
|
251 \ \textup{kg} \\
|
||||||
|
327 \ \textup{kg} \\
|
||||||
|
328 \ \textup{kg} \\
|
||||||
|
301 \ \textup{kg} \\
|
||||||
|
270 \ \textup{kg} \\
|
||||||
|
330 \ \textup{kg}
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
u_1 \\
|
||||||
|
u_2 \\
|
||||||
|
u_3 \\
|
||||||
|
u_4 \\
|
||||||
|
u_5 \\
|
||||||
|
u_6 \\
|
||||||
|
u_7
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
b_{mean}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
b_{1990} \\
|
||||||
|
b_{1990} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{1992}
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
b_{male} \\
|
||||||
|
b_{female} \\
|
||||||
|
b_{male} \\
|
||||||
|
b_{female} \\
|
||||||
|
b_{male} \\
|
||||||
|
b_{female} \\
|
||||||
|
b_{male}
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
e_1 \\
|
||||||
|
e_2 \\
|
||||||
|
e_3 \\
|
||||||
|
e_4 \\
|
||||||
|
e_5 \\
|
||||||
|
e_6 \\
|
||||||
|
e_7
|
||||||
|
\end{bmatrix}
|
||||||
|
```
|
||||||
|
|
||||||
|
That's a nice equation, but now my hand is getting tired writing all those ``b``
|
||||||
|
terms over and over again, so I'm going to use
|
||||||
|
[the dot product](https://www.khanacademy.org/math/precalculus/x9e81a4f98389efdf:matrices/x9e81a4f98389efdf:multiplying-matrices-by-matrices/v/matrix-multiplication-intro)
|
||||||
|
to condense this down.
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
354 \textup{kg} \\
|
||||||
|
251 \textup{kg} \\
|
||||||
|
327 \textup{kg} \\
|
||||||
|
328 \textup{kg} \\
|
||||||
|
301 \textup{kg} \\
|
||||||
|
270 \textup{kg} \\
|
||||||
|
330 \textup{kg}
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
u_1 \\
|
||||||
|
u_2 \\
|
||||||
|
u_3 \\
|
||||||
|
u_4 \\
|
||||||
|
u_5 \\
|
||||||
|
u_6 \\
|
||||||
|
u_7
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 1 & 0 & 0 & 1 & 0 \\
|
||||||
|
1 & 1 & 0 & 0 & 0 & 1 \\
|
||||||
|
1 & 0 & 1 & 0 & 1 & 0 \\
|
||||||
|
1 & 0 & 1 & 0 & 0 & 1 \\
|
||||||
|
1 & 0 & 1 & 0 & 1 & 0 \\
|
||||||
|
1 & 0 & 0 & 1 & 1 & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\begin{bmatrix}
|
||||||
|
b_{mean} \\
|
||||||
|
b_{1990} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{1992} \\
|
||||||
|
b_{male} \\
|
||||||
|
b_{female}
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
e_1 \\
|
||||||
|
e_2 \\
|
||||||
|
e_3 \\
|
||||||
|
e_4 \\
|
||||||
|
e_5 \\
|
||||||
|
e_6 \\
|
||||||
|
e_7
|
||||||
|
\end{bmatrix}
|
||||||
|
```
|
||||||
|
|
||||||
|
That matrix in the middle with all the zeros and ones is called the **incidence
|
||||||
|
matrix**, and essentially reads like a table with each row corresponding to an
|
||||||
|
animal, and each column corresponding to a fixed effect. For brevity, we'll just
|
||||||
|
call it ``X``, though. One indicates that the animal and effect go together,
|
||||||
|
and zero means they don't. For our record, we could write a table to go with
|
||||||
|
``X``, and it would look like this:
|
||||||
|
|
||||||
|
| Animal | mean | 1990 | 1991 | 1992 | male | female |
|
||||||
|
|:------ |:---- |:---- |:---- |:---- |:---- |:------ |
|
||||||
|
| 1 | yes | yes | no | no | yes | no |
|
||||||
|
| 2 | yes | yes | no | no | no | yes |
|
||||||
|
| 3 | yes | no | yes | no | yes | no |
|
||||||
|
| 4 | yes | no | yes | no | no | yes |
|
||||||
|
| 5 | yes | no | yes | no | yes | no |
|
||||||
|
| 6 | yes | no | yes | no | no | yes |
|
||||||
|
| 7 | yes | no | no | yes | yes | no |
|
||||||
|
|
||||||
|
Now that we have ``X``, we have the ability to start making changes to allow
|
||||||
|
us to solve for ``u``. Immediately, we see that ``X`` is **singular**, meaning
|
||||||
|
it can't be solved directly. We kind of already knew that, but now we can
|
||||||
|
quantify it. We calculate the
|
||||||
|
[rank of ``X``](https://math.stackexchange.com/a/2080577),
|
||||||
|
and find that there is only enough information contained in it to solve for 4
|
||||||
|
variables, which means we need to eliminate two columns.
|
||||||
|
|
||||||
|
There are several ways to effectively eliminate fixed effects in this type of
|
||||||
|
system, but one of the simplest and the most common methods is to declare a
|
||||||
|
**base population**, and lump the fixed effects of animals within the base
|
||||||
|
population into the mean fixed effect. Note that it is possible to declare a
|
||||||
|
base population that has no animals in it, but that gives weird results. For
|
||||||
|
this example, we'll follow the convention built into `beefblup` and pick the
|
||||||
|
last occuring form of each variable.
|
||||||
|
|
||||||
|
### Base population
|
||||||
|
|
||||||
|
- **Year**: 1992
|
||||||
|
- **Sex**: Female
|
||||||
|
|
||||||
|
Now in order to use the base population, we simply drop the columns representing
|
||||||
|
conformity with the traits in the base population from ``X````. Our new
|
||||||
|
equation looks like
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
354 \ \textup{kg} \\
|
||||||
|
251 \ \textup{kg} \\
|
||||||
|
327 \ \textup{kg} \\
|
||||||
|
328 \ \textup{kg} \\
|
||||||
|
301 \ \textup{kg} \\
|
||||||
|
270 \ \textup{kg} \\
|
||||||
|
330 \ \textup{kg}
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
u_1 \\
|
||||||
|
u_2 \\
|
||||||
|
u_3 \\
|
||||||
|
u_4 \\
|
||||||
|
u_5 \\
|
||||||
|
u_6 \\
|
||||||
|
u_7
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 1 & 0 1 \\
|
||||||
|
1 & 1 & 0 0 \\
|
||||||
|
1 & 0 & 1 1 \\
|
||||||
|
1 & 0 & 1 0 \\
|
||||||
|
1 & 0 & 1 1 \\
|
||||||
|
1 & 0 & 0 1
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
b_{mean} \\
|
||||||
|
b_{1990} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{male} \\
|
||||||
|
\end{bmatrix}
|
||||||
|
+
|
||||||
|
\begin{bmatrix}
|
||||||
|
e_1 \\
|
||||||
|
e_2 \\
|
||||||
|
e_3 \\
|
||||||
|
e_4 \\
|
||||||
|
e_5 \\
|
||||||
|
e_6 \\
|
||||||
|
e_7
|
||||||
|
\end{bmatrix}
|
||||||
|
```
|
||||||
|
|
||||||
|
And the table for humans to understand:
|
||||||
|
|
||||||
|
| Animal | mean | 1990 | 1991 | female |
|
||||||
|
|:------ |:---- |:---- |:---- |:------ |
|
||||||
|
| 1 | yes | yes | no | no |
|
||||||
|
| 2 | yes | yes | no | yes |
|
||||||
|
| 3 | yes | no | yes | no |
|
||||||
|
| 4 | yes | no | yes | yes |
|
||||||
|
| 5 | yes | no | yes | no |
|
||||||
|
| 6 | yes | no | yes | yes |
|
||||||
|
| 7 | yes | no | no | no |
|
||||||
|
|
||||||
|
Even though each animal is said to participate in the mean, the result for the
|
||||||
|
mean will now actually be the average of the base population. Math is weird
|
||||||
|
sometimes.
|
||||||
|
|
||||||
|
Double-checking, the rank of ``X`` is still 4, so we can solve for the average
|
||||||
|
of the base population, and the effect of being born in 1990, the effect of
|
||||||
|
being born in 1991, and the effect of being male.
|
||||||
|
|
||||||
|
Whew! That was some transformation. We still haven't constrained this model
|
||||||
|
enough to solve it, though. Now on to the genotype.
|
||||||
|
|
||||||
|
## The statistical model: genotype as random effect
|
||||||
|
|
||||||
|
Remember I said above that genotype was a **random effect**? Statisticians say
|
||||||
|
"_a random effect is an effect that influences the variance and not the mean of
|
||||||
|
the observation in question._" I'm not sure exactly what that means or how that
|
||||||
|
is applicable to genotype, but it does let us add an additional constraint to
|
||||||
|
our model.
|
||||||
|
|
||||||
|
The basic gist of genetics is that organisms that are related to one another are
|
||||||
|
similar to one another. Based on a pedigree, we can even say how related to one
|
||||||
|
another animals are, and quantify that as the amount that the genotype terms
|
||||||
|
should be allowed to vary between related animals.
|
||||||
|
|
||||||
|
We'll need a pedigree for our animals:
|
||||||
|
|
||||||
|
### Calf Records
|
||||||
|
|
||||||
|
| ID | Sire | Dam | Birth Year | Sex | YW (kg) |
|
||||||
|
|:-- |:---- |:--- |:---------- |:------ |:------- |
|
||||||
|
| 1 | NA | NA | 1990 | Male | 354 |
|
||||||
|
| 2 | NA | NA | 1990 | Female | 251 |
|
||||||
|
| 3 | 1 | NA | 1991 | Male | 327 |
|
||||||
|
| 4 | 1 | NA | 1991 | Female | 328 |
|
||||||
|
| 5 | 1 | 2 | 1991 | Male | 301 |
|
||||||
|
| 6 | NA | 2 | 1991 | Female | 270 |
|
||||||
|
| 7 | NA | NA | 1992 | Male | 330 |
|
||||||
|
|
||||||
|
Now, because cows sexually reproduce, the genotype of one animal is halfway the
|
||||||
|
same as that of either parent (exception: inbreeding, see below). It should go
|
||||||
|
without saying that each animal's genotype is identical to that of itself. From
|
||||||
|
this we can then find the numerical multiplier for any relative (grandparent =
|
||||||
|
1/4, full sibling = 1, half sibling = 1/2, etc.). Let's write those values down
|
||||||
|
in a table.
|
||||||
|
|
||||||
|
| ID | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||||
|
|:-- |:--- |:--- |:--- |:--- |:--- |:--- |:-- |
|
||||||
|
| 1 | 1 | 0 | 1/2 | 1/2 | 1/2 | 0 | 0 |
|
||||||
|
| 2 | 0 | 1 | 0 | 0 | 1/2 | 1/2 | 0 |
|
||||||
|
| 3 | 1/2 | 0 | 1 | 1/4 | 1/4 | 0 | 0 |
|
||||||
|
| 4 | 1/2 | 0 | 1/4 | 1 | 1/4 | 0 | 0 |
|
||||||
|
| 5 | 1/2 | 1/2 | 1/4 | 1/4 | 1 | 1/4 | 0 |
|
||||||
|
| 6 | 0 | 1/2 | 0 | 0 | 1/4 | 1 | 0 |
|
||||||
|
| 7 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
|
||||||
|
|
||||||
|
Hmm. All those numbers look suspiciously like a matrix. Why don't I put them
|
||||||
|
into a matrix called ``A``?
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
1 & 0 & \frac{1}{2} & \frac{1}{2} & \frac{1}{2} & 0 & 0 \\
|
||||||
|
0 & 1 & 0 & 0 & \frac{1}{2} & \frac{1}{2} & 0 \\
|
||||||
|
\frac{1}{2} & 0 & 1 & \frac{1}{4} & \frac{1}{4} & 0 & 0 \\
|
||||||
|
\frac{1}{2} & 0 & \frac{1}{4} & 1 & \frac{1}{4} & 0 & 0 \\
|
||||||
|
\frac{1}{2} & \frac{1}{2} & \frac{1}{4} & \frac{1}{4} & 1 & \frac{1}{4} & 0 \\
|
||||||
|
0 & \frac{1}{2} & 0 & 0 & \frac{1}{4} & 1 & 0 \\
|
||||||
|
0 & 0 & 0 & 0 & 0 & 0 & 1
|
||||||
|
\end{bmatrix}
|
||||||
|
```
|
||||||
|
|
||||||
|
Now I'm going to take the matrix with all of the ``u`` values, and call it
|
||||||
|
``μ``. To quantify the idea of genetic relationship, I will then say that
|
||||||
|
|
||||||
|
```math
|
||||||
|
\textup{var}(μ) = A σ_μ^2
|
||||||
|
```
|
||||||
|
|
||||||
|
Where:
|
||||||
|
|
||||||
|
- ``A`` = the relationship matrix defined above
|
||||||
|
- ``σ_μ^2`` = the standard deviation of all the genotypes
|
||||||
|
|
||||||
|
To fully constrain the system, I have to make two more assumptions: 1) that the
|
||||||
|
error term in each animal's equation is independent from all other error terms,
|
||||||
|
and 2) that the error term for each animal is independent from the value of the
|
||||||
|
genotype. I will call the matrix holding the ``e`` values ``ε`` and then say
|
||||||
|
|
||||||
|
```math
|
||||||
|
\textup{var}(ϵ) = I σ_ϵ^2
|
||||||
|
```
|
||||||
|
|
||||||
|
```math
|
||||||
|
\textup{cov}(μ, ϵ) = \textup{cov}(ϵ, μ) = 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Substituting in the matrix names, our equation now looks like
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
354 \textup{kg} \\
|
||||||
|
251 \textup{kg} \\
|
||||||
|
327 \textup{kg} \\
|
||||||
|
328 \textup{kg} \\
|
||||||
|
301 \textup{kg} \\
|
||||||
|
270 \textup{kg} \\
|
||||||
|
330 \textup{kg}
|
||||||
|
\end{bmatrix}
|
||||||
|
= μ + X
|
||||||
|
\begin{bmatrix}
|
||||||
|
b_{mean} \\
|
||||||
|
b_{1990} \\
|
||||||
|
b_{1991} \\
|
||||||
|
b_{male} \\
|
||||||
|
\end{bmatrix}
|
||||||
|
+ ϵ
|
||||||
|
```
|
||||||
|
|
||||||
|
We are going to make three changes to this equation before we are ready to solve
|
||||||
|
it, but they are cosmetic details for this example.
|
||||||
|
|
||||||
|
1. Call the matrix on the left side of the equation ``Y`` (sometimes it's
|
||||||
|
called the **matrix of observations**)
|
||||||
|
2. Multiply ``μ`` by an identity matrix called ``Z``. Multiplying by the
|
||||||
|
identity matrix is the matrix form of multiplying by one, so nothing changes,
|
||||||
|
but if we later want to find one animal's genetic effect on another animal's
|
||||||
|
performance (e.g. a **maternal effects model**), we can alter ``Z`` to
|
||||||
|
allow that
|
||||||
|
3. Call the matrix with all the ``b`` values ``β``.
|
||||||
|
|
||||||
|
With all these changes, we now have
|
||||||
|
|
||||||
|
```math
|
||||||
|
Y = Z μ + X β + ϵ
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the canonical form of the mixed-model equation, and the form that
|
||||||
|
Charles Henderson used to first predict breeding values of livestock.
|
||||||
|
|
||||||
|
## Solving the equations
|
||||||
|
|
||||||
|
Henderson proved that the mixed-model equation can be solved by the following:
|
||||||
|
|
||||||
|
```math
|
||||||
|
\begin{bmatrix}
|
||||||
|
\hat{β} \\
|
||||||
|
\hat{μ}
|
||||||
|
\end{bmatrix}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
X'X & X'Z \\
|
||||||
|
Z'X & Z'Z+A^{-1}λ
|
||||||
|
\end{bmatrix}^{-1}
|
||||||
|
\begin{bmatrix}
|
||||||
|
X'Y \\
|
||||||
|
Z'Y
|
||||||
|
\end{bmatrix}
|
||||||
|
```
|
||||||
|
|
||||||
|
Where
|
||||||
|
|
||||||
|
- The variables with hats are the statistical estimates of their mixed-model
|
||||||
|
counterparts
|
||||||
|
- The predicted value of ``μ`` is called the _Best Linear Unbiased
|
||||||
|
Predictor_ or _BLUP_
|
||||||
|
- The estimated value of ``β`` is called the _Best Linear Unbiased Estimate_
|
||||||
|
or _BLUE_
|
||||||
|
- ' is the transpose operator
|
||||||
|
- ``λ`` is a single real number that is a function of the heritability for the trait
|
||||||
|
being predicted. It can be left out in many cases (``λ = 1``).
|
||||||
|
- ``λ = \frac{1-h^2}{h^2}``
|
||||||
|
|
||||||
|
What happened to
|
||||||
|
|
||||||
|
## Footnotes
|
||||||
|
|
||||||
|
### Exception
|
||||||
|
|
||||||
|
An animal **can** share its genome with itself by a factor of more than one:
|
||||||
|
that's called inbreeding! We can account for this, and `beefblup` does as it
|
||||||
|
calculates ``A``. This is an area that actually merits a good deal of study:
|
||||||
|
see chapter 2 of _Linear Models for the Prediction of Animal Breeding Values_ by
|
||||||
|
Raphael A. Mrode (ISBN 978 1 78064 391 5).
|
14
docs/src/index.md
Normal file
14
docs/src/index.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
```@meta
|
||||||
|
CurrentModule = BeefBLUP
|
||||||
|
```
|
||||||
|
|
||||||
|
# beefblup
|
||||||
|
|
||||||
|
Documentation for [BeefBLUP](https://github.com/MillironX/beefblup).
|
||||||
|
|
||||||
|
```@index
|
||||||
|
```
|
||||||
|
|
||||||
|
```@docs
|
||||||
|
beefblup
|
||||||
|
```
|
308
src/BeefBLUP.jl
Executable file
308
src/BeefBLUP.jl
Executable file
|
@ -0,0 +1,308 @@
|
||||||
|
# beefblup
|
||||||
|
# Julia package for performing single-variate BLUP to find beef cattle
|
||||||
|
# breeding values
|
||||||
|
# (C) 2021 Thomas A. Christensen II
|
||||||
|
# Licensed under BSD-3-Clause License
|
||||||
|
# cSpell:includeRegExp #.*
|
||||||
|
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||||
|
|
||||||
|
module BeefBLUP
|
||||||
|
|
||||||
|
# Import the required packages
|
||||||
|
using CSV
|
||||||
|
using DataFrames
|
||||||
|
using LinearAlgebra
|
||||||
|
using Dates
|
||||||
|
using Gtk
|
||||||
|
|
||||||
|
# Main entry-level function - acts just like the script
|
||||||
|
function beefblup()
|
||||||
|
|
||||||
|
# Ask for an input spreadsheet
|
||||||
|
path = open_dialog_native(
|
||||||
|
"Select a beefblup worksheet",
|
||||||
|
GtkNullContainer(),
|
||||||
|
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ask for an output text filename
|
||||||
|
savepath = save_dialog_native(
|
||||||
|
"Save your beefblup results",
|
||||||
|
GtkNullContainer(),
|
||||||
|
(GtkFileFilter("*.txt", name="Results file"),
|
||||||
|
"*.txt")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ask for heritability
|
||||||
|
print("What is the heritability for this trait?> ")
|
||||||
|
h2 = parse(Float64, readline(stdin))
|
||||||
|
|
||||||
|
beefblup(path, savepath, h2)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
function beefblup(datafile::String, h2::Float64)
|
||||||
|
# Assume the data is named the same as the file without the trailing extension
|
||||||
|
dataname = join(split(datafile, ".")[1:end - 1])
|
||||||
|
|
||||||
|
# Create a new results name
|
||||||
|
resultsfile = string(dataname, "_results.txt")
|
||||||
|
|
||||||
|
# Pass this info on to the worker
|
||||||
|
beefblup(datafile, resultsfile, h2)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Main worker function, can perform all the work if given all the user input
|
||||||
|
function beefblup(path::String, savepath::String, h2::Float64)
|
||||||
|
|
||||||
|
# Import data from a suitable spreadsheet
|
||||||
|
data = DataFrame(CSV.File(path))
|
||||||
|
|
||||||
|
# Make sure the data is in the proper format
|
||||||
|
renamecolstospec!(data)
|
||||||
|
|
||||||
|
# Sort the array by date
|
||||||
|
sort!(data, :birthdate)
|
||||||
|
|
||||||
|
# Define fields to hold id values for animals and their parents
|
||||||
|
numanimals = length(data.id)
|
||||||
|
|
||||||
|
# Calculate the relationship matrix
|
||||||
|
A = additiverelationshipmatrix(data.id, data.dam, data.sire)
|
||||||
|
|
||||||
|
# Extract all of the fixed effects
|
||||||
|
fixedeffectdata = data[:,5:end-1]
|
||||||
|
|
||||||
|
(X, fixedeffects) = fixedeffectmatrix(fixedeffectdata)
|
||||||
|
|
||||||
|
# Extract the observed data
|
||||||
|
Y = convert(Array{Float64}, data[:,end])
|
||||||
|
|
||||||
|
# The random effects matrix
|
||||||
|
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||||
|
|
||||||
|
# Remove items where there is no data
|
||||||
|
nullobs = findall(isnothing, Y)
|
||||||
|
Z[nullobs, nullobs] .= 0
|
||||||
|
|
||||||
|
# Calculate heritability
|
||||||
|
λ = (1 - h2) / h2
|
||||||
|
|
||||||
|
# Use the mixed-model equations
|
||||||
|
MME = [X' * X X' * Z; Z' * X (Z' * Z) + (inv(A) .* λ)]
|
||||||
|
MMY = [X' * Y; Z' * Y]
|
||||||
|
solutions = MME \ MMY
|
||||||
|
|
||||||
|
# Find the accuracies
|
||||||
|
diaginv = diag(inv(MME))
|
||||||
|
reliability = ones(Float64, length(diaginv)) .- diaginv .* λ
|
||||||
|
|
||||||
|
# Find how many traits we found BLUE for
|
||||||
|
numgroups = length(reliability) - numanimals
|
||||||
|
|
||||||
|
# Split the BLUP and BLUE results
|
||||||
|
β = solutions[1:numgroups]
|
||||||
|
μ = solutions[numgroups+1:end]
|
||||||
|
μ_reliability = reliability[numgroups+1:end]
|
||||||
|
|
||||||
|
# Extract the names of the traits
|
||||||
|
traitname = names(data)[end]
|
||||||
|
|
||||||
|
# Start printing results to output
|
||||||
|
fileID = open(savepath, "w")
|
||||||
|
write(fileID, "beefblup Results Report\n")
|
||||||
|
write(fileID, "Produced using beefblup (")
|
||||||
|
write(fileID, "https://github.com/millironx/beefblup")
|
||||||
|
write(fileID, ")\n\n")
|
||||||
|
write(fileID, "Input:\t")
|
||||||
|
write(fileID, path)
|
||||||
|
write(fileID, "\nAnalysis performed:\t")
|
||||||
|
write(fileID, string(Dates.today()))
|
||||||
|
write(fileID, "\nTrait examined:\t")
|
||||||
|
write(fileID, traitname)
|
||||||
|
write(fileID, "\n\n")
|
||||||
|
|
||||||
|
# Print base population stats
|
||||||
|
write(fileID, "Base Population:\n")
|
||||||
|
for i in 1:length(fixedeffects)
|
||||||
|
effect = fixedeffects[i]
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(effect.name))
|
||||||
|
write(fileID, ":\t")
|
||||||
|
write(fileID, string(effect.basetrait))
|
||||||
|
write(fileID, "\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
write(fileID, "\tMean ")
|
||||||
|
write(fileID, traitname)
|
||||||
|
write(fileID, ":\t")
|
||||||
|
write(fileID, string(solutions[1]))
|
||||||
|
write(fileID, "\n\n")
|
||||||
|
|
||||||
|
# Contemporary group adjustments
|
||||||
|
counter = 2
|
||||||
|
write(fileID, "Contemporary Group Effects:\n")
|
||||||
|
for i in 1:length(fixedeffects)
|
||||||
|
effect = fixedeffects[i]
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, effect.name)
|
||||||
|
write(fileID, "\tEffect\n")
|
||||||
|
for j in 1:length(effect.alltraits)
|
||||||
|
types = effect.alltraits[j]
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(types))
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(β[counter]))
|
||||||
|
write(fileID, "\n")
|
||||||
|
|
||||||
|
counter = counter + 1
|
||||||
|
end
|
||||||
|
write(fileID, "\n")
|
||||||
|
end
|
||||||
|
write(fileID, "\n")
|
||||||
|
|
||||||
|
# Expected breeding values
|
||||||
|
write(fileID, "Expected Breeding Values:\n")
|
||||||
|
write(fileID, "\tID\tEBV\tReliability\n")
|
||||||
|
for i in 1:numanimals
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(data.id[i]))
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(μ[i]))
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(μ_reliability[i]))
|
||||||
|
write(fileID, "\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
write(fileID, "\n - END REPORT -")
|
||||||
|
close(fileID)
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
fixedeffectmatrix(fixedeffectdata::DataFrame)
|
||||||
|
|
||||||
|
Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed
|
||||||
|
effects listed in `fixedeffectdata`.
|
||||||
|
|
||||||
|
Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the
|
||||||
|
actual matrix, and `fixedeffects` is the contemporary groupings.
|
||||||
|
"""
|
||||||
|
function fixedeffectmatrix(fixedeffectdata::DataFrame)
|
||||||
|
# Declare an empty return matrix
|
||||||
|
fixedeffects = FixedEffect[]
|
||||||
|
|
||||||
|
# Add each trait to the array
|
||||||
|
for i in 1:size(fixedeffectdata)[2]
|
||||||
|
name = names(fixedeffectdata)[i]
|
||||||
|
traits = eachcol(fixedeffectdata)[i]
|
||||||
|
|
||||||
|
if length(unique(traits)) > 1
|
||||||
|
push!(fixedeffects, FixedEffect(name, traits))
|
||||||
|
else
|
||||||
|
@warn string("column '", name, "' does not have any unique animals and will be dropped from analysis")
|
||||||
|
pname = propertynames(fixedeffectdata)[i]
|
||||||
|
DataFrames.select!(fixedeffectdata, Not(pname))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
X = ones(Int64, (size(fixedeffectdata)[1], 1))
|
||||||
|
|
||||||
|
for i in 1:length(fixedeffects)
|
||||||
|
trait = fixedeffects[i]
|
||||||
|
for phenotype in trait.alltraits
|
||||||
|
X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return X, fixedeffects
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
additiverelationshipmatrix(id, dam, sire)
|
||||||
|
|
||||||
|
Returns the additive numerator relationship matrix based on the pedigree provided in `dam`
|
||||||
|
and `sire` for animals in `id`.
|
||||||
|
|
||||||
|
"""
|
||||||
|
function additiverelationshipmatrix(id::AbstractVector, damid::AbstractVector, sireid::AbstractVector)
|
||||||
|
# Sanity-check for valid pedigree
|
||||||
|
if !(length(id) == length(damid) && length(damid) == length(sireid))
|
||||||
|
throw(ArgumentError("id, dam, and sire must be of the same length"))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Convert to positions
|
||||||
|
dam = indexin(damid, id)
|
||||||
|
sire = indexin(sireid, id)
|
||||||
|
|
||||||
|
# Calculate loop iterations
|
||||||
|
numanimals = length(dam)
|
||||||
|
|
||||||
|
# Create an empty matrix for the additive relationship matrix
|
||||||
|
A = zeros(numanimals, numanimals)
|
||||||
|
|
||||||
|
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||||
|
# Henderson
|
||||||
|
for i in 1:numanimals
|
||||||
|
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0.5 * (A[j,sire[i]] + A[j,dam[i]])
|
||||||
|
A[i,j] = A[j,i]
|
||||||
|
end
|
||||||
|
A[i,i] = 1 + 0.5 * A[sire[i], dam[i]]
|
||||||
|
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0.5 * A[j,dam[i]]
|
||||||
|
A[i,j] = A[j,i]
|
||||||
|
end
|
||||||
|
A[i,i] = 1
|
||||||
|
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0.5 * A[j,sire[i]]
|
||||||
|
A[i,j] = A[j,i]
|
||||||
|
end
|
||||||
|
A[i,i] = 1
|
||||||
|
else
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0
|
||||||
|
A[i,j] = 0
|
||||||
|
end
|
||||||
|
A[i,i] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return A
|
||||||
|
end
|
||||||
|
|
||||||
|
"""
|
||||||
|
renamecolstospec(::DataFrame)
|
||||||
|
|
||||||
|
Renames the first four columns of the beefblup data sheet so that they can be referred to by
|
||||||
|
name instead of by column index, regardless of user input.
|
||||||
|
"""
|
||||||
|
function renamecolstospec!(df::DataFrame)
|
||||||
|
# Pull out the fixed-effect and observation name
|
||||||
|
othernames = propertynames(df)[5:end]
|
||||||
|
|
||||||
|
# Put specification column names and user-defined names together
|
||||||
|
allnames = cat([:id, :birthdate, :dam, :sire], othernames, dims=1)
|
||||||
|
|
||||||
|
# Rename in the DataFrame
|
||||||
|
rename!(df, allnames, makeunique=true)
|
||||||
|
return df
|
||||||
|
end
|
||||||
|
|
||||||
|
struct FixedEffect
|
||||||
|
name::String
|
||||||
|
basetrait::Any
|
||||||
|
alltraits::AbstractArray{Any}
|
||||||
|
end
|
||||||
|
|
||||||
|
function FixedEffect(name::String, incidences)
|
||||||
|
basetrait = last(unique(incidences))
|
||||||
|
types = unique(incidences)[1:end-1]
|
||||||
|
return FixedEffect(name, basetrait, types)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
end
|
21
test/runtests.jl
Normal file
21
test/runtests.jl
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
using BeefBLUP
|
||||||
|
using DataFrames
|
||||||
|
using Test
|
||||||
|
|
||||||
|
@testset "BeefBLUP.jl" begin
|
||||||
|
# Write your tests here.
|
||||||
|
correctX = [1 1 0 1; 1 1 0 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 0 1]
|
||||||
|
fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"])
|
||||||
|
@test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX
|
||||||
|
correctA = [1 0 1/2 1/2 1/2 0 0;
|
||||||
|
0 1 0 0 1/2 1/2 0;
|
||||||
|
1/2 0 1 1/4 1/4 0 0;
|
||||||
|
1/2 0 1/4 1 1/4 0 0;
|
||||||
|
1/2 1/2 1/4 1/4 1 1/4 0;
|
||||||
|
0 1/2 0 0 1/4 1 0;
|
||||||
|
0 0 0 0 0 0 1]
|
||||||
|
id = collect(1:7)
|
||||||
|
dam_id = [missing, missing, missing, missing, 2, 2, missing]
|
||||||
|
sire_id = [missing, missing, 1, 1, 1, missing, missing]
|
||||||
|
@test BeefBLUP.additiverelationshipmatrix(id, dam_id, sire_id) == correctA
|
||||||
|
end
|
Loading…
Reference in a new issue