mirror of
https://github.com/MillironX/beefblup.git
synced 2024-11-10 18:23:08 +00:00
Compare commits
No commits in common. "develop" and "v0.1" have entirely different histories.
25 changed files with 913 additions and 1243 deletions
4
.gitattributes
vendored
4
.gitattributes
vendored
|
@ -14,6 +14,4 @@
|
|||
*.docx -crlf -diff -merge
|
||||
*.pdf -crlf -diff -merge
|
||||
*.jpg -crlf -diff -merge
|
||||
*.png -crlf -diff -merge
|
||||
*.jl linguist-language=Julia
|
||||
beefblup linguist-language=Julia
|
||||
*.png -crlf -diff -merge
|
26
.github/workflows/Documentation.yml
vendored
26
.github/workflows/Documentation.yml
vendored
|
@ -1,26 +0,0 @@
|
|||
name: Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
DISPLAY: ':99.0'
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: julia-actions/setup-julia@latest
|
||||
with:
|
||||
version: '1.5'
|
||||
- run: sudo apt-get install xvfb -y
|
||||
- run: xvfb-run --auto-servernum julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
|
||||
- run: xvfb-run --auto-servernum julia --project=docs/ docs/make.jl
|
195
.gitignore
vendored
195
.gitignore
vendored
|
@ -1,3 +1,60 @@
|
|||
## MATLAB gitignores
|
||||
|
||||
# Windows default autosave extension
|
||||
*.asv
|
||||
|
||||
# OSX / *nix default autosave extension
|
||||
*.m~
|
||||
|
||||
# Compiled MEX binaries (all platforms)
|
||||
*.mex*
|
||||
|
||||
# Packaged app and toolbox files
|
||||
*.mlappinstall
|
||||
*.mltbx
|
||||
|
||||
# Generated helpsearch folders
|
||||
helpsearch*/
|
||||
|
||||
# Simulink code generation folders
|
||||
slprj/
|
||||
sccprj/
|
||||
|
||||
# Matlab code generation folders
|
||||
codegen/
|
||||
|
||||
# Simulink autosave extension
|
||||
*.autosave
|
||||
|
||||
# Octave session info
|
||||
octave-workspace
|
||||
|
||||
## Windows gitignores
|
||||
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
## Visual Studio Code gitignores
|
||||
|
||||
.vscode/*
|
||||
|
@ -5,9 +62,145 @@
|
|||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/settings.json
|
||||
.vscode/settings.json
|
||||
|
||||
## Microsoft Office gitignores
|
||||
|
||||
*.tmp
|
||||
|
||||
# Word temporary
|
||||
~$*.doc*
|
||||
|
||||
# Word Auto Backup File
|
||||
Backup of *.doc*
|
||||
|
||||
# Excel temporary
|
||||
~$*.xls*
|
||||
|
||||
# Excel Backup File
|
||||
*.xlk
|
||||
|
||||
# PowerPoint temporary
|
||||
~$*.ppt*
|
||||
|
||||
# Visio autosave temporary files
|
||||
*.~vsd*
|
||||
|
||||
## Python gitignores
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Ignore results files
|
||||
Results.txt
|
||||
results.txt
|
||||
|
||||
### Julia.gitignore
|
||||
|
|
15
.travis.yml
15
.travis.yml
|
@ -1,15 +0,0 @@
|
|||
language: julia
|
||||
os:
|
||||
- linux
|
||||
services:
|
||||
- xvfb
|
||||
julia:
|
||||
- 1.3
|
||||
- nightly
|
||||
matrix:
|
||||
allow_failures:
|
||||
- julia: nightly
|
||||
fast_finish: true
|
||||
notifications:
|
||||
email: false
|
||||
codecov: true
|
18
.vscode/launch.json
vendored
18
.vscode/launch.json
vendored
|
@ -1,18 +0,0 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "julia",
|
||||
"request": "launch",
|
||||
"name": "beefblup CLI debug",
|
||||
"program": "${file}",
|
||||
"stopOnEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"juliaEnv": "${command:activeJuliaEnvironment}",
|
||||
"args": ["data/sample.csv", "-t 0.5"]
|
||||
}
|
||||
]
|
||||
}
|
10
.vscode/settings.json
vendored
10
.vscode/settings.json
vendored
|
@ -1,10 +0,0 @@
|
|||
{
|
||||
"cSpell.words": [
|
||||
"BLUP",
|
||||
"EBVs",
|
||||
"EPDs",
|
||||
"autob",
|
||||
"beefblup",
|
||||
"dairyblup"
|
||||
]
|
||||
}
|
BIN
Excel/Master BLUP Worksheet.xlsx
Normal file
BIN
Excel/Master BLUP Worksheet.xlsx
Normal file
Binary file not shown.
BIN
Excel/van der Werf.xlsx
Normal file
BIN
Excel/van der Werf.xlsx
Normal file
Binary file not shown.
287
Julia/beefblup.jl
Normal file
287
Julia/beefblup.jl
Normal file
|
@ -0,0 +1,287 @@
|
|||
# beefblup
|
||||
# Main script for performing single-variate BLUP to find beef cattle
|
||||
# breeding values
|
||||
# Usage: julia beefblup.jl
|
||||
# (C) 2020 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
|
||||
# Import the required packages
|
||||
using XLSX
|
||||
using LinearAlgebra
|
||||
using Dates
|
||||
using Gtk
|
||||
|
||||
# Display stuff
|
||||
println("beefblup v 0.1")
|
||||
println("(C) 2020 Thomas A. Christensen II")
|
||||
println("https://github.com/millironx/beefblup")
|
||||
print("\n")
|
||||
|
||||
### Prompt User
|
||||
# Ask for an input spreadsheet
|
||||
path = open_dialog_native(
|
||||
"Select a beefblup worksheet",
|
||||
GtkNullContainer(),
|
||||
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
|
||||
)
|
||||
|
||||
# Ask for an output text filename
|
||||
savepath = save_dialog_native(
|
||||
"Save your beefblup results",
|
||||
GtkNullContainer(),
|
||||
(GtkFileFilter("*.txt", name="Results file"),
|
||||
"*.txt")
|
||||
)
|
||||
|
||||
# Ask for heritability
|
||||
print("What is the heritability for this trait?> ")
|
||||
h2 = parse(Float64, readline(stdin))
|
||||
|
||||
### Import input filename
|
||||
print("[🐮]: Importing Excel file...")
|
||||
|
||||
# Import data from a suitable spreadsheet
|
||||
data = XLSX.readxlsx(path)[1][:]
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Process input file
|
||||
print("[🐮]: Processing and formatting data...")
|
||||
|
||||
# Extract the headers into a separate array
|
||||
headers = data[1,:]
|
||||
data = data[2:end,:]
|
||||
|
||||
# Sort the array by date
|
||||
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
|
||||
|
||||
# Define fields to hold id values for animals and their parents
|
||||
ids = string.(data[:,1])
|
||||
damids = string.(data[:,3])
|
||||
sireids = string.(data[:,4])
|
||||
numanimals = length(ids)
|
||||
|
||||
# Find the index values for animals and their parents
|
||||
dam = indexin(damids, ids)
|
||||
sire = indexin(sireids, ids)
|
||||
|
||||
# Store column numbers that need to be deleted
|
||||
# Column 6 contains an intermediate Excel calculation and always need to
|
||||
# be deleted
|
||||
colstokeep = [1, 2, 3, 4, 5]
|
||||
|
||||
# Find any columns that need to be deleted
|
||||
for i in 7:length(headers)
|
||||
if length(unique(data[:,i])) <= 1
|
||||
colname = headers[i]
|
||||
print("Column '")
|
||||
print(colname)
|
||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
||||
else
|
||||
push!(colstokeep, i)
|
||||
end
|
||||
end
|
||||
|
||||
# Delete the appropriate columns from the datasheet and the headers
|
||||
data = data[:, colstokeep]
|
||||
headers = headers[colstokeep]
|
||||
|
||||
# Determine how many contemporary groups there are
|
||||
numgroups = ones(1, length(headers)-5)
|
||||
for i in 6:length(headers)
|
||||
numgroups[i-5] = length(unique(data[:,i]))
|
||||
end
|
||||
|
||||
# If there are more groups than animals, then the analysis cannot continue
|
||||
if sum(numgroups) >= numanimals
|
||||
println("There are more contemporary groups than animals. The analysis will
|
||||
now abort.")
|
||||
exit()
|
||||
end
|
||||
|
||||
# Define a "normal" animal as one of the last in the groups, provided that
|
||||
# all traits do not have null values
|
||||
normal = Array{String}(undef,1,length(headers)-5)
|
||||
for i in 6:length(headers)
|
||||
for j in numanimals:-1:1
|
||||
if !ismissing(data[j,i])
|
||||
normal[i-5] = string(data[j,i])
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Create the fixed-effect matrix
|
||||
print("[🐮]: Creating the fixed-effect matrix...")
|
||||
|
||||
# Form the fixed-effect matrix
|
||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
||||
X[:,1] = ones(Int8, 1, numanimals)
|
||||
|
||||
# Create an external counter that will increment through both loops
|
||||
counter = 2
|
||||
|
||||
# Store the traits in a string array
|
||||
adjustedtraits =
|
||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
||||
# Iterate through each group
|
||||
for i in 1:length(normal)
|
||||
# Find the traits that are present in this trait
|
||||
localdata = string.(data[:,i+5])
|
||||
traits = unique(localdata)
|
||||
# Remove the normal version from the analysis
|
||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||
# Iterate inside of the group
|
||||
for j in 1:length(effecttraits)
|
||||
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
||||
X[matchedindex, counter] .= 1
|
||||
# Add this trait to the string
|
||||
adjustedtraits[counter - 1] = traits[j]
|
||||
# Increment the big counter
|
||||
global counter = counter + 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Additive relationship matrix
|
||||
print("[🐮]: Creating additive relationship matrix...")
|
||||
|
||||
# Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals)
|
||||
|
||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||
# Henderson
|
||||
for i in 1:numanimals
|
||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,dam[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,sire[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
else
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0
|
||||
A[i,j] = 0
|
||||
end
|
||||
A[i,i] = 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Perform BLUP
|
||||
print("[🐮]: Solving the mixed-model equations...")
|
||||
|
||||
# Extract the observed data
|
||||
Y = convert(Array{Float64}, data[:,5])
|
||||
|
||||
# The random effects matrix
|
||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||
|
||||
# Remove items where there is no data
|
||||
nullobs = findall(isnothing, Y)
|
||||
Z[nullobs, nullobs] .= 0
|
||||
|
||||
# Calculate heritability
|
||||
λ = (1-h2)/h2
|
||||
|
||||
# Use the mixed-model equations
|
||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
||||
MMY = [X'*Y; Z'*Y]
|
||||
solutions = MME\MMY
|
||||
|
||||
# Find the accuracies
|
||||
diaginv = diag(inv(MME))
|
||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Output the results
|
||||
print("[🐮]: Saving results...")
|
||||
|
||||
# Find how many traits we found BLUE for
|
||||
numgroups = numgroups .- 1
|
||||
|
||||
# Start printing results to output
|
||||
fileID = open(savepath, "w")
|
||||
write(fileID, "beefblup Results Report\n")
|
||||
write(fileID, "Produced using beefblup for Julia (")
|
||||
write(fileID, "https://github.com/millironx/beefblup")
|
||||
write(fileID, ")\n\n")
|
||||
write(fileID, "Input:\t")
|
||||
write(fileID, path)
|
||||
write(fileID, "\nAnalysis performed:\t")
|
||||
write(fileID, string(Dates.today()))
|
||||
write(fileID, "\nTrait examined:\t")
|
||||
write(fileID, headers[5])
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Print base population stats
|
||||
write(fileID, "Base Population:\n")
|
||||
for i in 1:length(numgroups)
|
||||
write(fileID, "\t")
|
||||
write(fileID, headers[i+5])
|
||||
write(fileID, ":\t")
|
||||
write(fileID, normal[i])
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\tMean ")
|
||||
write(fileID, headers[5])
|
||||
write(fileID, ":\t")
|
||||
write(fileID, string(solutions[1]))
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Contemporary group adjustments
|
||||
counter = 2
|
||||
write(fileID, "Contemporary Group Effects:\n")
|
||||
for i in 1:length(numgroups)
|
||||
write(fileID, "\t")
|
||||
write(fileID, headers[i+5])
|
||||
write(fileID, "\tEffect\tReliability\n")
|
||||
for j in 1:numgroups[i]
|
||||
write(fileID, "\t")
|
||||
write(fileID, adjustedtraits[counter - 1])
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[counter]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[counter]))
|
||||
write(fileID, "\n")
|
||||
|
||||
global counter = counter + 1
|
||||
end
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\n")
|
||||
|
||||
# Expected breeding values
|
||||
write(fileID, "Expected Breeding Values:\n")
|
||||
write(fileID, "\tID\tEBV\tReliability\n")
|
||||
for i in 1:numanimals
|
||||
write(fileID, "\t")
|
||||
write(fileID, ids[i])
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[i+counter-1]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[i+counter-1]))
|
||||
write(fileID, "\n")
|
||||
end
|
||||
|
||||
write(fileID, "\n - END REPORT -")
|
||||
close(fileID)
|
||||
|
||||
print("Done!\n")
|
13
Julia/install.jl
Normal file
13
Julia/install.jl
Normal file
|
@ -0,0 +1,13 @@
|
|||
# beefblup install
|
||||
# Prepares the Julia environment for using beefblup by installing the requisite
|
||||
# packages
|
||||
# Usage: julia install.jl
|
||||
# (C) 2020 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
|
||||
# Import the package manager
|
||||
using Pkg
|
||||
|
||||
# Install requisite packages
|
||||
Pkg.add("XLSX")
|
||||
Pkg.add("Gtk")
|
|
@ -1,29 +1,29 @@
|
|||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2021, Thomas A. Christensen II
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020, Thomas A. Christensen II
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
341
MATLAB/beefblup.m
Normal file
341
MATLAB/beefblup.m
Normal file
|
@ -0,0 +1,341 @@
|
|||
% beefblup
|
||||
% Main script for performing single-variate BLUP to find beef cattle
|
||||
% breeding values
|
||||
% Usage: beefblup
|
||||
% (C) 2020 Thomas A. Christensen II
|
||||
% Licensed under BSD-3-Clause License
|
||||
|
||||
% Prepare the workspace for computation
|
||||
clear
|
||||
clc
|
||||
close all
|
||||
|
||||
%% Display stuff
|
||||
disp('beefblup v. 0.1')
|
||||
disp('(C) 2020 Thomas A. Christensen II')
|
||||
disp('https://github.com/millironx/beefblup')
|
||||
disp(' ')
|
||||
|
||||
%% Prompt User
|
||||
% Ask for an input spreadsheet
|
||||
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
|
||||
|
||||
% Ask for an ouput text file
|
||||
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
|
||||
|
||||
% Ask for heritability
|
||||
h2 = input('What is the heritability for this trait? >> ');
|
||||
|
||||
|
||||
%% Import input file
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Importing Excel file...')
|
||||
|
||||
% Import data from a suitable spreadsheet
|
||||
fullname = [path name];
|
||||
clear name path
|
||||
[~, ~, data] = xlsread(fullname);
|
||||
|
||||
disp('Importing Excel file... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Process input file
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Processing and formatting data...')
|
||||
disp(' ')
|
||||
|
||||
% Extract the headers into a separate array
|
||||
headers = data(1,:);
|
||||
data(1,:) = [];
|
||||
|
||||
% Convert the string dates to numbers
|
||||
data(:,2) = num2cell(datenum(data(:,2)));
|
||||
|
||||
% Sort the array by date
|
||||
data = sortrows(data,2);
|
||||
|
||||
% Coerce all id fields to string format
|
||||
strings = [1 3 4];
|
||||
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
|
||||
|
||||
% Define fields to hold id values for animals and their parents
|
||||
ids = char(data{:,1});
|
||||
damids = char(data{:,3});
|
||||
sireids = char(data{:,4});
|
||||
numanimals = length(data(:,1));
|
||||
|
||||
% Define fields to hold the index values for animals and their parents
|
||||
dam = zeros(numanimals,1);
|
||||
sire = zeros(numanimals,1);
|
||||
|
||||
% Find all row numbers where an animal was a parent
|
||||
for i=1:numanimals
|
||||
% Find all animals that this animal birthed
|
||||
dammatch = ismember(damids, ids(i,:), 'rows');
|
||||
damindexes = find(dammatch == 1);
|
||||
dam(damindexes) = i;
|
||||
|
||||
% Find all animals that this animal sired
|
||||
sirematch = ismember(sireids, ids(i,:), 'rows');
|
||||
sireindexes = find(sirematch == 1);
|
||||
sire(sireindexes) = i;
|
||||
end
|
||||
|
||||
% Store column numbers that need to be deleted
|
||||
% Column 6 contains an intermediate Excel calculation and always needs to
|
||||
% be deleted
|
||||
colstodelete = 6;
|
||||
|
||||
% Coerce each group to string format
|
||||
for i = 7:length(headers)
|
||||
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
|
||||
end
|
||||
|
||||
% Find any columns that need to be deleted
|
||||
for i = 7:length(headers)
|
||||
if length(uniquecell(data(:,i))) <= 1
|
||||
colname = headers{i};
|
||||
disp(['Column "' colname '" does not have any unique animals and will be removed'])
|
||||
disp('from this analysis');
|
||||
colstodelete = [colstodelete i];
|
||||
end
|
||||
end
|
||||
|
||||
% Delete the appropriate columns from the datasheet and the headers
|
||||
data(:,colstodelete) = [];
|
||||
headers(colstodelete) = [];
|
||||
|
||||
% Determine how many contemporary groups there are
|
||||
numgroups = ones(1, length(headers)-5);
|
||||
for i = 6:length(headers)
|
||||
numgroups(i-5) = length(uniquecell(data(:,i)));
|
||||
end
|
||||
|
||||
% If there are more groups than animals, then the analysis cannot continue
|
||||
if sum(numgroups) >= numanimals
|
||||
disp('There are more contemporary groups than animals. The analysis will now abort.');
|
||||
return
|
||||
end
|
||||
|
||||
% Define a "normal" animal as one of the last in the groups, provided that
|
||||
% all traits do not have null values
|
||||
normal = cell([1 length(headers)-5]);
|
||||
for i = 6:length(headers)
|
||||
for j = numanimals:-1:1
|
||||
if not(cellfun(@isempty, data(j,i)))
|
||||
normal(i - 5) = data(j,i);
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
% Print the results of the "normal" definition
|
||||
disp(' ')
|
||||
disp('For the purposes of this analysis, a "normal" animal will be defined')
|
||||
disp('by the following traits:')
|
||||
for i = 6:length(headers)
|
||||
disp([headers{i} ': ' normal{i-5}])
|
||||
end
|
||||
disp(' ')
|
||||
disp('If no animal matching this description exists, the results may appear')
|
||||
disp('outlandish, but are still as correct as the accuracy suggests')
|
||||
disp(' ')
|
||||
|
||||
disp('Processing and formatting data... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Create the fixed-effect matrix
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Creating the fixed-effect matrix...')
|
||||
|
||||
% Form the fixed effect matrix
|
||||
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
|
||||
X(:,1) = ones(1, numanimals);
|
||||
|
||||
% Create an external counter that will increment through both loops
|
||||
I = 2;
|
||||
|
||||
% Store the traits in a string cell array
|
||||
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
|
||||
|
||||
% Iterate through each group
|
||||
for i = 1:length(normal)
|
||||
% Find the traits that are present in this trait
|
||||
traits = uniquecell(data(:,i+5));
|
||||
|
||||
% Remove the "normal" version from the analysis
|
||||
normalindex = find(strcmp(traits, normal{i}));
|
||||
traits(normalindex) = [];
|
||||
|
||||
% Iterate inside of the group
|
||||
for j = 1:length(traits)
|
||||
matchedindex = find(strcmp(data(:,i+5), traits{j}));
|
||||
X(matchedindex, I) = 1;
|
||||
|
||||
% Add this trait to the string
|
||||
adjustedtraits(I - 1) = traits(j);
|
||||
|
||||
% Increment the big counter
|
||||
I = I + 1;
|
||||
end
|
||||
end
|
||||
|
||||
disp('Creating the fixed-effect matrix... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Additive relationship matrix
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Creating the additive relationship matrix...')
|
||||
|
||||
% Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals);
|
||||
|
||||
% Create the additive relationship matrix by the FORTRAN method presented
|
||||
% by Henderson
|
||||
for i = 1:numanimals
|
||||
if dam(i) ~= 0 && sire(i) ~= 0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*(A(j,sire(i))+A(j,dam(i)));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1 + 0.5*A(sire(i),dam(i));
|
||||
elseif dam(i) ~= 0 && sire(i) == 0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*A(j,dam(i));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1;
|
||||
elseif dam(i) == 0 && sire(i) ~=0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*A(j,sire(i));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1;
|
||||
else
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0;
|
||||
A(i,j) = 0;
|
||||
end
|
||||
A(i,i) = 1;
|
||||
end
|
||||
end
|
||||
|
||||
disp('Creating the additive relationship matrix... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Perform BLUP
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Solving the mixed-model equations')
|
||||
|
||||
% Extract the observed data
|
||||
Y = cell2mat(data(:, 5));
|
||||
|
||||
% The identity matrix for random effects
|
||||
Z = eye(numanimals, numanimals);
|
||||
|
||||
% Remove items where there is no data
|
||||
nullobs = find(isnan(Y));
|
||||
Z(nullobs, nullobs) = 0;
|
||||
|
||||
% Calculate heritability
|
||||
lambda = (1-h2)/h2;
|
||||
|
||||
% Use the mixed-model equations
|
||||
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
|
||||
|
||||
% Find the accuracies
|
||||
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
|
||||
reliability = 1 - diaginv.*lambda;
|
||||
|
||||
disp('Solving the mixed-model equations... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Output the results
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Saving results...')
|
||||
|
||||
% Find how many traits we found BLUE for
|
||||
numgroups = numgroups - 1;
|
||||
|
||||
% Start printing results to output
|
||||
fileID = fopen([savepath savename], 'w');
|
||||
fprintf(fileID, 'beefblup Results Report\n');
|
||||
fprintf(fileID, 'Produced using beefblup for MATLAB (');
|
||||
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
|
||||
fprintf(fileID, ')\n\n');
|
||||
fprintf(fileID, 'Input:\t');
|
||||
fprintf(fileID, '%s', fullname);
|
||||
fprintf(fileID, '\nAnalysis performed:\t');
|
||||
fprintf(fileID, date);
|
||||
fprintf(fileID, '\nTrait examined:\t');
|
||||
fprintf(fileID, [headers{5}]);
|
||||
fprintf(fileID, '\n\n');
|
||||
|
||||
% Print base population stats
|
||||
fprintf(fileID, 'Base Population:\n');
|
||||
for i = 1:length(numgroups)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [headers{i+5}]);
|
||||
fprintf(fileID, ':\t');
|
||||
fprintf(fileID, [normal{i}]);
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
fprintf(fileID, '\tMean ');
|
||||
fprintf(fileID, [headers{5}]);
|
||||
fprintf(fileID, ':\t');
|
||||
fprintf(fileID, num2str(solutions(1)));
|
||||
fprintf(fileID, '\n\n');
|
||||
|
||||
I = 2;
|
||||
|
||||
% Contemporary group adjustments
|
||||
fprintf(fileID, 'Contemporary Group Effects:\n');
|
||||
for i = 1:length(numgroups)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [headers{i+5}]);
|
||||
fprintf(fileID, '\tEffect\tReliability\n');
|
||||
for j = 1:numgroups(i)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [adjustedtraits{I-1}]);
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(solutions(I)));
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(reliability(I)));
|
||||
fprintf(fileID, '\n');
|
||||
|
||||
I = I + 1;
|
||||
end
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
fprintf(fileID, '\n');
|
||||
|
||||
% Expected breeding values
|
||||
fprintf(fileID, 'Expected Breeding Values:\n');
|
||||
fprintf(fileID, '\tID\tEBV\tReliability\n');
|
||||
for i = 1:numanimals
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [data{i,1}]);
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(solutions(i+I-1)));
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(reliability(i+I-1)));
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
|
||||
fprintf(fileID, '\n - END REPORT -');
|
||||
fclose(fileID);
|
||||
|
||||
disp('Saving results... Done!')
|
||||
toc
|
||||
disp(' ')
|
9
MATLAB/uniquecell.m
Normal file
9
MATLAB/uniquecell.m
Normal file
|
@ -0,0 +1,9 @@
|
|||
% uniquenan
|
||||
% Serves the same purpose as UNIQUE, but ensures any empty cells are not
|
||||
% counted
|
||||
function y = uniquecell(x)
|
||||
y = unique(x);
|
||||
if any(cellfun(@isempty, y))
|
||||
y(cellfun(@isempty, y)) = [];
|
||||
end
|
||||
end
|
21
Project.toml
21
Project.toml
|
@ -1,21 +0,0 @@
|
|||
name = "BeefBLUP"
|
||||
uuid = "03993faf-e476-444a-86c9-f31e8122fa24"
|
||||
authors = ["Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>"]
|
||||
version = "0.3.0"
|
||||
|
||||
[deps]
|
||||
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
|
||||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[compat]
|
||||
julia = "1.3"
|
||||
|
||||
[extras]
|
||||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
|
||||
|
||||
[targets]
|
||||
test = ["Test"]
|
81
README.md
81
README.md
|
@ -1,9 +1,5 @@
|
|||
# [:cow:]: beefblup
|
||||
|
||||
[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://millironx.github.io/beefblup/stable)
|
||||
[![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://MillironX.github.io/beefblup/dev)
|
||||
[![Build Status](https://travis-ci.com/millironx/beefblup.svg?branch=develop)](https://travis-ci.com/millironx/beefblup)
|
||||
[![Coverage](https://codecov.io/gh/millironx/beefblup/branch/develop/graph/badge.svg)](https://codecov.io/gh/millironx/beefblup)
|
||||
[![GitHub license](https://img.shields.io/github/license/MillironX/beefblup)](https://github.com/MillironX/beefblup/blob/master/LICENSE.md)
|
||||
[![Join the chat at https://gitter.im/beefblup/community](https://badges.gitter.im/beefblup/community.svg)](https://gitter.im/beefblup/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[![Github all releases](https://img.shields.io/github/downloads/MillironX/beefblup/total.svg)](https://GitHub.com/MillironX/beefblup/releases)
|
||||
|
@ -17,34 +13,35 @@ Why? It's part of my effort to
|
|||
## Installation
|
||||
|
||||
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
||||
2. Download the [beefblup ZIP
|
||||
file](https://github.com/MillironX/beefblup/archive/refs/tags/v0.2.2.zip) and unzip it someplace memorable
|
||||
3. In your file explorer, copy the address of the "beefblup" folder
|
||||
4. Launch Julia
|
||||
5. Type `cd("<the address copied in step 5>")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||
6. Type the `]` key
|
||||
7. Type `activate .` and press **Enter**
|
||||
8. Type `instantiate` and press **Enter**
|
||||
9. Installation is done: you can close the Julia window
|
||||
2. Open a new Julia window and type the `]` key
|
||||
3. Type `add XLSX Gtk` and press **Enter**
|
||||
|
||||
Alternatively, you can run the [install
|
||||
script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from
|
||||
Julia.
|
||||
|
||||
## How to Use
|
||||
|
||||
1. Make a copy of the "sample.csv" spreadsheet and replace the data with your own
|
||||
1. The trait you wish to calculate EBVs for always goes in the rightmost column
|
||||
2. If you wish to add more contemporary group traits to your analysis, include them before the rightmost column
|
||||
2. Save and close
|
||||
3. In your file explorer, copy the address of the "beefblup" folder
|
||||
4. Launch Julia
|
||||
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||
6. Type the `]` key
|
||||
7. Type `activate .` and press **Enter**
|
||||
8. Press **Backspace**
|
||||
9. Type `include("src/beefblup.jl")` and press **Enter**
|
||||
10. Select the spreadsheet you created in steps 1-4
|
||||
11. Follow the on-screen prompts
|
||||
12. **#KeepEPDsReal!**
|
||||
> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118).
|
||||
> Although it's tempting to just open up beefblup in Juno and press the big play
|
||||
> button, it won't work. Follow these instructions until it's fixed. If you
|
||||
> don't know what Juno is: ignore this message.
|
||||
|
||||
1. Download the [beefblup ZIP
|
||||
file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it
|
||||
someplace memorable
|
||||
2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own
|
||||
3. If you wish to add more contemporary group traits to your analysis, replace
|
||||
or add them to the right of the Purple section
|
||||
4. Save and close
|
||||
5. In your file explorer, copy the address of the "Julia" folder
|
||||
6. Launch Julia
|
||||
7. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup\Julia")`)
|
||||
8. Type `include("beefblup.jl")` and press **Enter**
|
||||
9. Select the spreadsheet you created in steps 1-4
|
||||
10. Follow the on-screen prompts
|
||||
11. **#KeepEPDsReal!**
|
||||
|
||||
## For Programmers
|
||||
|
||||
|
@ -54,19 +51,19 @@ Why? It's part of my effort to
|
|||
|
||||
### Development Roadmap
|
||||
|
||||
| Version | Feature | Status |
|
||||
| ------- | ----------------------------------------------------------------------------------- | ------------------ |
|
||||
| v0.1 | Julia port of original MATLAB script | :heavy_check_mark: |
|
||||
| v0.2 | Spreadsheet format redesign | :heavy_check_mark: |
|
||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) | |
|
||||
| v0.4 | Add GUI for all options | |
|
||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | |
|
||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) | |
|
||||
| v0.7 | Multiple trait BLUP | |
|
||||
| v0.8 | Maternal effects BLUP | |
|
||||
| v0.9 | Genomic BLUP | |
|
||||
| v0.10 | beefblup binaries | |
|
||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | |
|
||||
| Version | Feature |
|
||||
| ------- | ------------------------------------------------------------------- |
|
||||
| v0.1 | Julia port of original MATLAB script |
|
||||
| v0.2 | Spreadsheet format redesign |
|
||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) |
|
||||
| v0.4 | Add GUI for all options |
|
||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects |
|
||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) |
|
||||
| v0.7 | Multiple trait BLUP |
|
||||
| v0.8 | Maternal effects BLUP |
|
||||
| v0.9 | Genomic BLUP |
|
||||
| v0.10 | beefblup binaries |
|
||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) |
|
||||
|
||||
### Bug Reports
|
||||
|
||||
|
|
47
beefblup
47
beefblup
|
@ -1,47 +0,0 @@
|
|||
#!/bin/bash
|
||||
# -*- mode: julia -*-
|
||||
#=
|
||||
exec julia --project="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" "${BASH_SOURCE[0]}" "$@"
|
||||
=#
|
||||
# beefblup cli
|
||||
# Provides a convenient CLI for using beefblup
|
||||
# cSpell:includeRegExp #.*
|
||||
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||
|
||||
# Import packages
|
||||
using BeefBLUP
|
||||
using ArgParse
|
||||
|
||||
# If this is run without arguments, catch that before parsing
|
||||
if isempty(ARGS)
|
||||
BeefBLUP.beefblup()
|
||||
exit()
|
||||
end
|
||||
|
||||
# Setup the argument table
|
||||
argsettings = ArgParseSettings()
|
||||
@add_arg_table argsettings begin
|
||||
"datafile"
|
||||
help = "datasheet containing the pedigree, fixed effects, and response trait formatted in CSV"
|
||||
required = true
|
||||
arg_type = String
|
||||
"resultsfile"
|
||||
help = "filename for storing the results of the analysis"
|
||||
required = false
|
||||
arg_type = String
|
||||
"--heritability", "-t"
|
||||
help = "heritability of the response trait being analyzed"
|
||||
required = true
|
||||
arg_type = Float64
|
||||
end
|
||||
|
||||
arguments = parse_args(argsettings)
|
||||
|
||||
h2 = arguments["heritability"]
|
||||
|
||||
if isnothing(arguments["resultsfile"])
|
||||
BeefBLUP.beefblup(arguments["datafile"], h2)
|
||||
exit()
|
||||
end
|
||||
|
||||
BeefBLUP.beefblup(arguments["datafile"], arguments["resultsfile"], h2)
|
|
@ -1,8 +0,0 @@
|
|||
id,birthdate,dam,sire,year,sex,species,weaning_weight
|
||||
1,1/1/1990,,,1990,male,cow,354
|
||||
2,1/1/1990,,,1990,female,cow,251
|
||||
3,1/1/1991,,1,1991,male,cow,327
|
||||
4,1/1/1991,,1,1991,female,cow,328
|
||||
5,1/1/1991,2,1,1991,male,cow,301
|
||||
6,1/1/1991,2,,1991,female,cow,270
|
||||
7,1/1/1992,,,1992,male,cow,330
|
|
|
@ -1,3 +0,0 @@
|
|||
[deps]
|
||||
BeefBLUP = "03993faf-e476-444a-86c9-f31e8122fa24"
|
||||
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
|
26
docs/make.jl
26
docs/make.jl
|
@ -1,26 +0,0 @@
|
|||
using BeefBLUP
|
||||
using Documenter
|
||||
|
||||
DocMeta.setdocmeta!(BeefBLUP, :DocTestSetup, :(using BeefBLUP); recursive=true)
|
||||
|
||||
makedocs(;
|
||||
modules=[BeefBLUP],
|
||||
authors="Thomas A. Christensen II <25492070+MillironX@users.noreply.github.com>",
|
||||
repo="https://github.com/MillironX/beefblup/blob/{commit}{path}#{line}",
|
||||
sitename="beefblup",
|
||||
format=Documenter.HTML(;
|
||||
prettyurls=get(ENV, "CI", "false") == "true",
|
||||
canonical="https://millironx.com/beefblup",
|
||||
assets=String[],
|
||||
),
|
||||
pages=[
|
||||
"Home" => "index.md",
|
||||
"How to Calculate EPDs" => "how-to-calculate-epds.md",
|
||||
"CLI Reference (WIP)" => "beefblup-cli.md"
|
||||
],
|
||||
)
|
||||
|
||||
deploydocs(;
|
||||
repo="github.com/MillironX/beefblup",
|
||||
devbranch="develop",
|
||||
)
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
"MD041": false
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
```@meta
|
||||
CurrentModule = BeefBLUP
|
||||
```
|
||||
|
||||
# beefblup Command Line Interface (CLI) documentation
|
||||
|
||||
> _A work in progress_
|
||||
|
||||
**Notice:** This document is a draft for what the command-line interface for
|
||||
beefblup would look like as of version 1.0, if beefblup was even a command-line
|
||||
application to begin with (it's not). It is modeled (loosely) after the man page
|
||||
format. It is not intended to be taken seriously, but instead to serve as a
|
||||
useful thought experiment and brainstorming ground on the future of beefblup.
|
||||
Please use it if it clarifies things for you. If it doesn't, ignore it.
|
||||
|
||||
## Input file
|
||||
|
||||
beefblup requires a very specific format of input file. The format may be in
|
||||
comma-separated values (CSV) or Excel 2007+ (XLSX) format. CSV files should not
|
||||
be quoted (and therefore cannot have commas within cell values). Other formats
|
||||
may be forthcoming.
|
||||
|
||||
A beefblup data file must have at least six columns appearing in this order:
|
||||
|
||||
- ID
|
||||
- Sire ID
|
||||
- Dam ID
|
||||
- Birthdate
|
||||
- Fixed effect(s)
|
||||
- Response variable(s)
|
||||
|
||||
The first row always contains column names. The values of column names are
|
||||
unimportant for the first four columns, as they will always be treated the same
|
||||
regardless of the name. The generated report will use the column names
|
||||
of fixed effects and response variables as given.
|
||||
|
||||
Each fixed effect should have its own column, to as many as are needed. There is
|
||||
no limit to the number of fixed effects as defined by beefblup, however its
|
||||
dependencies might have some. The same rules apply to response variables.
|
||||
|
||||
Unknown values should be left blank (`,,`). Do not substitute null placeholders
|
||||
(e.g. `NULL`, `NA`, `0`, `nothing`, `undefined`, etc.) for unknown values.
|
||||
|
||||
An example spreadsheet might have the following format
|
||||
|
||||
| ID | Sire ID | Dam ID | Birthdate | Sex | Weaning Weight |
|
||||
| --- | ------- | ------ | --------- | ------ | -------------- |
|
||||
| 1 | | | 1/1/1990 | Male | 354 |
|
||||
| 2 | 1 | | 1/1/1990 | Female | 251 |
|
||||
| 3 | 1 | | 1/1/1991 | Male | 327 |
|
||||
| 4 | 1 | 2 | 1/1/1991 | Female | 328 |
|
||||
| 5 | | 2 | 1/1/1991 | Male | 301 |
|
||||
| 6 | | | 1/1/1991 | Female | 270 |
|
||||
| 7 | | | 1/1/1992 | Male | 330 |
|
||||
|
||||
## Synopsis
|
||||
|
||||
```bash
|
||||
beefblup [-G SNPs_file] [-M num_response_vars] [-o report_spreadsheet]
|
||||
[--no-aod] [--no-year] [--no-season] [--no-autob] [--maternal] input_file
|
||||
[report_file]
|
||||
```
|
||||
|
||||
## Command line basic syntax
|
||||
|
||||
The most basic input is to simply pass the input file name to the program.
|
||||
|
||||
```bash
|
||||
beefblup filename.csv
|
||||
```
|
||||
|
||||
In this case beefblup will insert fixed-effects for age-of-dam, year, and
|
||||
season, and will calculate the EBVs for the response variable in the final
|
||||
column. The report will then be saved as `filename_report.txt`.
|
||||
|
||||
## Suppressing automatically-calculated fixed effects
|
||||
|
||||
If you don't wish to include one of the automatically calculated fixed-effects
|
||||
from your model, you can pass arguments to suppress them.
|
||||
|
||||
### Suppress Age-of-dam
|
||||
|
||||
```bash
|
||||
beefblup --no-aod filename.csv
|
||||
```
|
||||
|
||||
### Suppress year
|
||||
|
||||
```bash
|
||||
beefblup --no-year filename.csv
|
||||
```
|
||||
|
||||
### Suppress season
|
||||
|
||||
```bash
|
||||
beefblup --no-season filename.csv
|
||||
```
|
||||
|
||||
### Suppress all calculated fixed effects
|
||||
|
||||
```bash
|
||||
beefblup --no-autob filename.csv
|
||||
```
|
||||
|
||||
The argument `--no-autob` comes from the nomenclature of assigning fixed-effects
|
||||
to the matrix _b_ in Henderson's mixed-model equations.
|
|
@ -1,542 +0,0 @@
|
|||
```@meta
|
||||
CurrentModule = BeefBLUP
|
||||
```
|
||||
|
||||
# How to Calculate EPDs
|
||||
|
||||
Not to exclude our Australian comrades or our dairy friends, this guide could
|
||||
alternately be called
|
||||
|
||||
- How to Calculate Expected Breeding Values (EBVs)
|
||||
- How to Calculate Predicted Transmitting Abilities (PTAs)
|
||||
- How to Calculate Expected Progeny Differences (EPDs)
|
||||
|
||||
Since I'm mostly talking to American beef producers, though, we'll stick with
|
||||
EPDs for most of this discussion.
|
||||
|
||||
Expected Breeding Values (EBVs) (which are more often halved and published as
|
||||
Expected Progeny Differences (EPDs) or Predicted Transmitting Abilities (PTAs)
|
||||
in the United States) are generally found using Charles Henderson's linear
|
||||
mixed-model equations. Great, you say, what is that? I'm glad you asked...
|
||||
|
||||
## The mathematical model
|
||||
|
||||
Every genetics textbook starts with the following equation
|
||||
|
||||
```math
|
||||
P = G + E
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- ``P`` = phenotype
|
||||
- ``G`` = genotype (think: breeding value)
|
||||
- ``E`` = environmental factors
|
||||
|
||||
Now, we can't identify _every_ environmental factor that affects phenotype, but
|
||||
we can identify some of them, so let's substitute ``E`` with some absolutes. A
|
||||
good place to start is the "contemporary group" listings for the trait of
|
||||
interest in the
|
||||
[BIF Guidelines](https://beefimprovement.org/wp-content/uploads/2018/03/BIFGuidelinesFinal_updated0318.pdf),
|
||||
though for the purposes of this example, I'm only going to consider sex, and
|
||||
birth year.
|
||||
|
||||
```math
|
||||
P = G + E_{year} + E_{sex}
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- ``E_n`` is the effect of ``n`` on the phenotype
|
||||
|
||||
Now let's say I want to find the weaning weight breeding value (``G``) of my
|
||||
favorite herd bull. I compile his stats, and then plug them into the equation
|
||||
and solve for ``G``, right? Let's try that.
|
||||
|
||||
### Calf Records
|
||||
|
||||
| ID | Birth Year | Sex | YW (kg) |
|
||||
|:-- | :--------- | :----- |:------- |
|
||||
| 1 | 1990 | Male | 354 |
|
||||
|
||||
```math
|
||||
354 \ \textup{kg} = G_1 + E_{1990} + E_{male}
|
||||
```
|
||||
|
||||
Hmm. I just realized I don't know any of those ``E`` values. Come to think of it,
|
||||
I remember from math class that I will need as many equations as I have
|
||||
unknowns, so I will add equations for other animals that I have records for.
|
||||
|
||||
### Calf Records
|
||||
|
||||
| ID | Birth Year | Sex | YW (kg) |
|
||||
|:--- |:---------- |:------ |:------- |
|
||||
| 1 | 1990 | Male | 354 |
|
||||
| 2 | 1990 | Female | 251 |
|
||||
| 3 | 1991 | Male | 327 |
|
||||
| 4 | 1991 | Female | 328 |
|
||||
| 5 | 1991 | Male | 301 |
|
||||
| 6 | 1991 | Female | 270 |
|
||||
| 7 | 1992 | Male | 330 |
|
||||
|
||||
```math
|
||||
\begin{aligned}
|
||||
251 \ \textup{kg} &= G_2 + E_{1990} + E_{female} \\
|
||||
327 \ \textup{kg} &= G_3 + E_{1991} + E_{male} \\
|
||||
328 \ \textup{kg} &= G_4 + E_{1991} + E_{female} \\
|
||||
301 \ \textup{kg} &= G_5 + E_{1991} + E_{male} \\
|
||||
270 \ \textup{kg} &= G_6 + E_{1991} + E_{female} \\
|
||||
330 \ \textup{kg} &= G_7 + E_{1992} + E_{male}
|
||||
\end{aligned}
|
||||
```
|
||||
|
||||
Drat! Every animal I added brings more variables into the system than it
|
||||
eliminates! In fact, since each cow brings in _at least_ one term
|
||||
(``G_n``), I will never be able to write enough equations to solve for
|
||||
``G`` numerically. I will have to use a different approach.
|
||||
|
||||
## The statistical model: the setup
|
||||
|
||||
Since I can never solve for ``G`` directly, I will have to find some way to
|
||||
estimate it. I can switch to a statistical model and solve for ``G`` that way. The
|
||||
caveat with a statistical model is that there will be some level of error, but
|
||||
so long as we know and can control the level of error, that will be better than
|
||||
not knowing ``G`` at all.
|
||||
|
||||
Since we're switching into a statistical space, we should also switch the
|
||||
variables we're using. I'll rewrite the first equation as
|
||||
|
||||
```math
|
||||
y = b + u + e
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- ``y`` = Phenotype
|
||||
- ``b`` = Environment
|
||||
- ``u`` = Genotype
|
||||
- ``e`` = Error
|
||||
|
||||
It's not as easy as simply substituting ``b`` for every ``E`` that we had above,
|
||||
however. The reason for that is that we must make the assumption that
|
||||
environment is a **fixed effect** and that genotype is a **random effect**. I'll
|
||||
go over why that is later, but for now, understand that we need to transform the
|
||||
environment terms and genotype terms separately.
|
||||
|
||||
We'll start with the environment terms.
|
||||
|
||||
## The statistical model: environment as fixed effects
|
||||
|
||||
To properly transform the equations, I will have to introduce
|
||||
``b_{mean}`` terms in each animal's equation. This is part of the fixed
|
||||
effect statistical assumption, and it will let us obtain a solution.
|
||||
|
||||
Here are the transformed equations:
|
||||
|
||||
```math
|
||||
\begin{aligned}
|
||||
354 \ \textup{kg} &= u_1 + b_{mean} + b_{1990} + b_{male} + e_1 \\
|
||||
251 \ \textup{kg} &= u_2 + b_{mean} + b_{1990} + b_{female} + e_2 \\
|
||||
327 \ \textup{kg} &= u_3 + b_{mean} + b_{1991} + b_{male} + e_3 \\
|
||||
328 \ \textup{kg} &= u_4 + b_{mean} + b_{1991} + b_{female} +e_4 \\
|
||||
301 \ \textup{kg} &= u_5 + b_{mean} + b_{1991} + b_{male} + e_5 \\
|
||||
270 \ \textup{kg} &= u_6 + b_{mean} + b_{1991} + b_{female} + e_6 \\
|
||||
330 \ \textup{kg} &= u_7 + b_{mean} + b_{1992} + b_{male} + e_7
|
||||
\end{aligned}
|
||||
```
|
||||
|
||||
Statistical methods work best in matrix form, so I'm going to convert the set of
|
||||
equations above to a single matrix equation that means the exact same thing.
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
354 \ \textup{kg} \\
|
||||
251 \ \textup{kg} \\
|
||||
327 \ \textup{kg} \\
|
||||
328 \ \textup{kg} \\
|
||||
301 \ \textup{kg} \\
|
||||
270 \ \textup{kg} \\
|
||||
330 \ \textup{kg}
|
||||
\end{bmatrix}
|
||||
=
|
||||
\begin{bmatrix}
|
||||
u_1 \\
|
||||
u_2 \\
|
||||
u_3 \\
|
||||
u_4 \\
|
||||
u_5 \\
|
||||
u_6 \\
|
||||
u_7
|
||||
\end{bmatrix}
|
||||
+
|
||||
b_{mean}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
b_{1990} \\
|
||||
b_{1990} \\
|
||||
b_{1991} \\
|
||||
b_{1991} \\
|
||||
b_{1991} \\
|
||||
b_{1991} \\
|
||||
b_{1992}
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
b_{male} \\
|
||||
b_{female} \\
|
||||
b_{male} \\
|
||||
b_{female} \\
|
||||
b_{male} \\
|
||||
b_{female} \\
|
||||
b_{male}
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
e_1 \\
|
||||
e_2 \\
|
||||
e_3 \\
|
||||
e_4 \\
|
||||
e_5 \\
|
||||
e_6 \\
|
||||
e_7
|
||||
\end{bmatrix}
|
||||
```
|
||||
|
||||
That's a nice equation, but now my hand is getting tired writing all those ``b``
|
||||
terms over and over again, so I'm going to use
|
||||
[the dot product](https://www.khanacademy.org/math/precalculus/x9e81a4f98389efdf:matrices/x9e81a4f98389efdf:multiplying-matrices-by-matrices/v/matrix-multiplication-intro)
|
||||
to condense this down.
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
354 \textup{kg} \\
|
||||
251 \textup{kg} \\
|
||||
327 \textup{kg} \\
|
||||
328 \textup{kg} \\
|
||||
301 \textup{kg} \\
|
||||
270 \textup{kg} \\
|
||||
330 \textup{kg}
|
||||
\end{bmatrix}
|
||||
=
|
||||
\begin{bmatrix}
|
||||
u_1 \\
|
||||
u_2 \\
|
||||
u_3 \\
|
||||
u_4 \\
|
||||
u_5 \\
|
||||
u_6 \\
|
||||
u_7
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 1 & 0 & 0 & 1 & 0 \\
|
||||
1 & 1 & 0 & 0 & 0 & 1 \\
|
||||
1 & 0 & 1 & 0 & 1 & 0 \\
|
||||
1 & 0 & 1 & 0 & 0 & 1 \\
|
||||
1 & 0 & 1 & 0 & 1 & 0 \\
|
||||
1 & 0 & 0 & 1 & 1 & 0
|
||||
\end{bmatrix}
|
||||
\begin{bmatrix}
|
||||
b_{mean} \\
|
||||
b_{1990} \\
|
||||
b_{1991} \\
|
||||
b_{1992} \\
|
||||
b_{male} \\
|
||||
b_{female}
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
e_1 \\
|
||||
e_2 \\
|
||||
e_3 \\
|
||||
e_4 \\
|
||||
e_5 \\
|
||||
e_6 \\
|
||||
e_7
|
||||
\end{bmatrix}
|
||||
```
|
||||
|
||||
That matrix in the middle with all the zeros and ones is called the **incidence
|
||||
matrix**, and essentially reads like a table with each row corresponding to an
|
||||
animal, and each column corresponding to a fixed effect. For brevity, we'll just
|
||||
call it ``X``, though. One indicates that the animal and effect go together,
|
||||
and zero means they don't. For our record, we could write a table to go with
|
||||
``X``, and it would look like this:
|
||||
|
||||
| Animal | mean | 1990 | 1991 | 1992 | male | female |
|
||||
|:------ |:---- |:---- |:---- |:---- |:---- |:------ |
|
||||
| 1 | yes | yes | no | no | yes | no |
|
||||
| 2 | yes | yes | no | no | no | yes |
|
||||
| 3 | yes | no | yes | no | yes | no |
|
||||
| 4 | yes | no | yes | no | no | yes |
|
||||
| 5 | yes | no | yes | no | yes | no |
|
||||
| 6 | yes | no | yes | no | no | yes |
|
||||
| 7 | yes | no | no | yes | yes | no |
|
||||
|
||||
Now that we have ``X``, we have the ability to start making changes to allow
|
||||
us to solve for ``u``. Immediately, we see that ``X`` is **singular**, meaning
|
||||
it can't be solved directly. We kind of already knew that, but now we can
|
||||
quantify it. We calculate the
|
||||
[rank of ``X``](https://math.stackexchange.com/a/2080577),
|
||||
and find that there is only enough information contained in it to solve for 4
|
||||
variables, which means we need to eliminate two columns.
|
||||
|
||||
There are several ways to effectively eliminate fixed effects in this type of
|
||||
system, but one of the simplest and the most common methods is to declare a
|
||||
**base population**, and lump the fixed effects of animals within the base
|
||||
population into the mean fixed effect. Note that it is possible to declare a
|
||||
base population that has no animals in it, but that gives weird results. For
|
||||
this example, we'll follow the convention built into `beefblup` and pick the
|
||||
last occuring form of each variable.
|
||||
|
||||
### Base population
|
||||
|
||||
- **Year**: 1992
|
||||
- **Sex**: Female
|
||||
|
||||
Now in order to use the base population, we simply drop the columns representing
|
||||
conformity with the traits in the base population from ``X````. Our new
|
||||
equation looks like
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
354 \ \textup{kg} \\
|
||||
251 \ \textup{kg} \\
|
||||
327 \ \textup{kg} \\
|
||||
328 \ \textup{kg} \\
|
||||
301 \ \textup{kg} \\
|
||||
270 \ \textup{kg} \\
|
||||
330 \ \textup{kg}
|
||||
\end{bmatrix}
|
||||
=
|
||||
\begin{bmatrix}
|
||||
u_1 \\
|
||||
u_2 \\
|
||||
u_3 \\
|
||||
u_4 \\
|
||||
u_5 \\
|
||||
u_6 \\
|
||||
u_7
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
1 & 1 & 0 1 \\
|
||||
1 & 1 & 0 0 \\
|
||||
1 & 0 & 1 1 \\
|
||||
1 & 0 & 1 0 \\
|
||||
1 & 0 & 1 1 \\
|
||||
1 & 0 & 0 1
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
b_{mean} \\
|
||||
b_{1990} \\
|
||||
b_{1991} \\
|
||||
b_{male} \\
|
||||
\end{bmatrix}
|
||||
+
|
||||
\begin{bmatrix}
|
||||
e_1 \\
|
||||
e_2 \\
|
||||
e_3 \\
|
||||
e_4 \\
|
||||
e_5 \\
|
||||
e_6 \\
|
||||
e_7
|
||||
\end{bmatrix}
|
||||
```
|
||||
|
||||
And the table for humans to understand:
|
||||
|
||||
| Animal | mean | 1990 | 1991 | female |
|
||||
|:------ |:---- |:---- |:---- |:------ |
|
||||
| 1 | yes | yes | no | no |
|
||||
| 2 | yes | yes | no | yes |
|
||||
| 3 | yes | no | yes | no |
|
||||
| 4 | yes | no | yes | yes |
|
||||
| 5 | yes | no | yes | no |
|
||||
| 6 | yes | no | yes | yes |
|
||||
| 7 | yes | no | no | no |
|
||||
|
||||
Even though each animal is said to participate in the mean, the result for the
|
||||
mean will now actually be the average of the base population. Math is weird
|
||||
sometimes.
|
||||
|
||||
Double-checking, the rank of ``X`` is still 4, so we can solve for the average
|
||||
of the base population, and the effect of being born in 1990, the effect of
|
||||
being born in 1991, and the effect of being male.
|
||||
|
||||
Whew! That was some transformation. We still haven't constrained this model
|
||||
enough to solve it, though. Now on to the genotype.
|
||||
|
||||
## The statistical model: genotype as random effect
|
||||
|
||||
Remember I said above that genotype was a **random effect**? Statisticians say
|
||||
"_a random effect is an effect that influences the variance and not the mean of
|
||||
the observation in question._" I'm not sure exactly what that means or how that
|
||||
is applicable to genotype, but it does let us add an additional constraint to
|
||||
our model.
|
||||
|
||||
The basic gist of genetics is that organisms that are related to one another are
|
||||
similar to one another. Based on a pedigree, we can even say how related to one
|
||||
another animals are, and quantify that as the amount that the genotype terms
|
||||
should be allowed to vary between related animals.
|
||||
|
||||
We'll need a pedigree for our animals:
|
||||
|
||||
### Calf Records
|
||||
|
||||
| ID | Sire | Dam | Birth Year | Sex | YW (kg) |
|
||||
|:-- |:---- |:--- |:---------- |:------ |:------- |
|
||||
| 1 | NA | NA | 1990 | Male | 354 |
|
||||
| 2 | NA | NA | 1990 | Female | 251 |
|
||||
| 3 | 1 | NA | 1991 | Male | 327 |
|
||||
| 4 | 1 | NA | 1991 | Female | 328 |
|
||||
| 5 | 1 | 2 | 1991 | Male | 301 |
|
||||
| 6 | NA | 2 | 1991 | Female | 270 |
|
||||
| 7 | NA | NA | 1992 | Male | 330 |
|
||||
|
||||
Now, because cows sexually reproduce, the genotype of one animal is halfway the
|
||||
same as that of either parent (exception: inbreeding, see below). It should go
|
||||
without saying that each animal's genotype is identical to that of itself. From
|
||||
this we can then find the numerical multiplier for any relative (grandparent =
|
||||
1/4, full sibling = 1, half sibling = 1/2, etc.). Let's write those values down
|
||||
in a table.
|
||||
|
||||
| ID | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
||||
|:-- |:--- |:--- |:--- |:--- |:--- |:--- |:-- |
|
||||
| 1 | 1 | 0 | 1/2 | 1/2 | 1/2 | 0 | 0 |
|
||||
| 2 | 0 | 1 | 0 | 0 | 1/2 | 1/2 | 0 |
|
||||
| 3 | 1/2 | 0 | 1 | 1/4 | 1/4 | 0 | 0 |
|
||||
| 4 | 1/2 | 0 | 1/4 | 1 | 1/4 | 0 | 0 |
|
||||
| 5 | 1/2 | 1/2 | 1/4 | 1/4 | 1 | 1/4 | 0 |
|
||||
| 6 | 0 | 1/2 | 0 | 0 | 1/4 | 1 | 0 |
|
||||
| 7 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
|
||||
|
||||
Hmm. All those numbers look suspiciously like a matrix. Why don't I put them
|
||||
into a matrix called ``A``?
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
1 & 0 & \frac{1}{2} & \frac{1}{2} & \frac{1}{2} & 0 & 0 \\
|
||||
0 & 1 & 0 & 0 & \frac{1}{2} & \frac{1}{2} & 0 \\
|
||||
\frac{1}{2} & 0 & 1 & \frac{1}{4} & \frac{1}{4} & 0 & 0 \\
|
||||
\frac{1}{2} & 0 & \frac{1}{4} & 1 & \frac{1}{4} & 0 & 0 \\
|
||||
\frac{1}{2} & \frac{1}{2} & \frac{1}{4} & \frac{1}{4} & 1 & \frac{1}{4} & 0 \\
|
||||
0 & \frac{1}{2} & 0 & 0 & \frac{1}{4} & 1 & 0 \\
|
||||
0 & 0 & 0 & 0 & 0 & 0 & 1
|
||||
\end{bmatrix}
|
||||
```
|
||||
|
||||
Now I'm going to take the matrix with all of the ``u`` values, and call it
|
||||
``μ``. To quantify the idea of genetic relationship, I will then say that
|
||||
|
||||
```math
|
||||
\textup{var}(μ) = A σ_μ^2
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- ``A`` = the relationship matrix defined above
|
||||
- ``σ_μ^2`` = the standard deviation of all the genotypes
|
||||
|
||||
To fully constrain the system, I have to make two more assumptions: 1) that the
|
||||
error term in each animal's equation is independent from all other error terms,
|
||||
and 2) that the error term for each animal is independent from the value of the
|
||||
genotype. I will call the matrix holding the ``e`` values ``ε`` and then say
|
||||
|
||||
```math
|
||||
\textup{var}(ϵ) = I σ_ϵ^2
|
||||
```
|
||||
|
||||
```math
|
||||
\textup{cov}(μ, ϵ) = \textup{cov}(ϵ, μ) = 0
|
||||
```
|
||||
|
||||
Substituting in the matrix names, our equation now looks like
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
354 \textup{kg} \\
|
||||
251 \textup{kg} \\
|
||||
327 \textup{kg} \\
|
||||
328 \textup{kg} \\
|
||||
301 \textup{kg} \\
|
||||
270 \textup{kg} \\
|
||||
330 \textup{kg}
|
||||
\end{bmatrix}
|
||||
= μ + X
|
||||
\begin{bmatrix}
|
||||
b_{mean} \\
|
||||
b_{1990} \\
|
||||
b_{1991} \\
|
||||
b_{male} \\
|
||||
\end{bmatrix}
|
||||
+ ϵ
|
||||
```
|
||||
|
||||
We are going to make three changes to this equation before we are ready to solve
|
||||
it, but they are cosmetic details for this example.
|
||||
|
||||
1. Call the matrix on the left side of the equation ``Y`` (sometimes it's
|
||||
called the **matrix of observations**)
|
||||
2. Multiply ``μ`` by an identity matrix called ``Z``. Multiplying by the
|
||||
identity matrix is the matrix form of multiplying by one, so nothing changes,
|
||||
but if we later want to find one animal's genetic effect on another animal's
|
||||
performance (e.g. a **maternal effects model**), we can alter ``Z`` to
|
||||
allow that
|
||||
3. Call the matrix with all the ``b`` values ``β``.
|
||||
|
||||
With all these changes, we now have
|
||||
|
||||
```math
|
||||
Y = Z μ + X β + ϵ
|
||||
```
|
||||
|
||||
This is the canonical form of the mixed-model equation, and the form that
|
||||
Charles Henderson used to first predict breeding values of livestock.
|
||||
|
||||
## Solving the equations
|
||||
|
||||
Henderson proved that the mixed-model equation can be solved by the following:
|
||||
|
||||
```math
|
||||
\begin{bmatrix}
|
||||
\hat{β} \\
|
||||
\hat{μ}
|
||||
\end{bmatrix}
|
||||
=
|
||||
\begin{bmatrix}
|
||||
X'X & X'Z \\
|
||||
Z'X & Z'Z+A^{-1}λ
|
||||
\end{bmatrix}^{-1}
|
||||
\begin{bmatrix}
|
||||
X'Y \\
|
||||
Z'Y
|
||||
\end{bmatrix}
|
||||
```
|
||||
|
||||
Where
|
||||
|
||||
- The variables with hats are the statistical estimates of their mixed-model
|
||||
counterparts
|
||||
- The predicted value of ``μ`` is called the _Best Linear Unbiased
|
||||
Predictor_ or _BLUP_
|
||||
- The estimated value of ``β`` is called the _Best Linear Unbiased Estimate_
|
||||
or _BLUE_
|
||||
- ' is the transpose operator
|
||||
- ``λ`` is a single real number that is a function of the heritability for the trait
|
||||
being predicted. It can be left out in many cases (``λ = 1``).
|
||||
- ``λ = \frac{1-h^2}{h^2}``
|
||||
|
||||
What happened to
|
||||
|
||||
## Footnotes
|
||||
|
||||
### Exception
|
||||
|
||||
An animal **can** share its genome with itself by a factor of more than one:
|
||||
that's called inbreeding! We can account for this, and `beefblup` does as it
|
||||
calculates ``A``. This is an area that actually merits a good deal of study:
|
||||
see chapter 2 of _Linear Models for the Prediction of Animal Breeding Values_ by
|
||||
Raphael A. Mrode (ISBN 978 1 78064 391 5).
|
|
@ -1,14 +0,0 @@
|
|||
```@meta
|
||||
CurrentModule = BeefBLUP
|
||||
```
|
||||
|
||||
# beefblup
|
||||
|
||||
Documentation for [BeefBLUP](https://github.com/MillironX/beefblup).
|
||||
|
||||
```@index
|
||||
```
|
||||
|
||||
```@docs
|
||||
beefblup
|
||||
```
|
308
src/BeefBLUP.jl
308
src/BeefBLUP.jl
|
@ -1,308 +0,0 @@
|
|||
# beefblup
|
||||
# Julia package for performing single-variate BLUP to find beef cattle
|
||||
# breeding values
|
||||
# (C) 2021 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
# cSpell:includeRegExp #.*
|
||||
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||
|
||||
module BeefBLUP
|
||||
|
||||
# Import the required packages
|
||||
using CSV
|
||||
using DataFrames
|
||||
using LinearAlgebra
|
||||
using Dates
|
||||
using Gtk
|
||||
|
||||
# Main entry-level function - acts just like the script
|
||||
function beefblup()
|
||||
|
||||
# Ask for an input spreadsheet
|
||||
path = open_dialog_native(
|
||||
"Select a beefblup worksheet",
|
||||
GtkNullContainer(),
|
||||
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||
)
|
||||
|
||||
# Ask for an output text filename
|
||||
savepath = save_dialog_native(
|
||||
"Save your beefblup results",
|
||||
GtkNullContainer(),
|
||||
(GtkFileFilter("*.txt", name="Results file"),
|
||||
"*.txt")
|
||||
)
|
||||
|
||||
# Ask for heritability
|
||||
print("What is the heritability for this trait?> ")
|
||||
h2 = parse(Float64, readline(stdin))
|
||||
|
||||
beefblup(path, savepath, h2)
|
||||
|
||||
end
|
||||
|
||||
function beefblup(datafile::String, h2::Float64)
|
||||
# Assume the data is named the same as the file without the trailing extension
|
||||
dataname = join(split(datafile, ".")[1:end - 1])
|
||||
|
||||
# Create a new results name
|
||||
resultsfile = string(dataname, "_results.txt")
|
||||
|
||||
# Pass this info on to the worker
|
||||
beefblup(datafile, resultsfile, h2)
|
||||
end
|
||||
|
||||
# Main worker function, can perform all the work if given all the user input
|
||||
function beefblup(path::String, savepath::String, h2::Float64)
|
||||
|
||||
# Import data from a suitable spreadsheet
|
||||
data = DataFrame(CSV.File(path))
|
||||
|
||||
# Make sure the data is in the proper format
|
||||
renamecolstospec!(data)
|
||||
|
||||
# Sort the array by date
|
||||
sort!(data, :birthdate)
|
||||
|
||||
# Define fields to hold id values for animals and their parents
|
||||
numanimals = length(data.id)
|
||||
|
||||
# Calculate the relationship matrix
|
||||
A = additiverelationshipmatrix(data.id, data.dam, data.sire)
|
||||
|
||||
# Extract all of the fixed effects
|
||||
fixedeffectdata = data[:,5:end-1]
|
||||
|
||||
(X, fixedeffects) = fixedeffectmatrix(fixedeffectdata)
|
||||
|
||||
# Extract the observed data
|
||||
Y = convert(Array{Float64}, data[:,end])
|
||||
|
||||
# The random effects matrix
|
||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||
|
||||
# Remove items where there is no data
|
||||
nullobs = findall(isnothing, Y)
|
||||
Z[nullobs, nullobs] .= 0
|
||||
|
||||
# Calculate heritability
|
||||
λ = (1 - h2) / h2
|
||||
|
||||
# Use the mixed-model equations
|
||||
MME = [X' * X X' * Z; Z' * X (Z' * Z) + (inv(A) .* λ)]
|
||||
MMY = [X' * Y; Z' * Y]
|
||||
solutions = MME \ MMY
|
||||
|
||||
# Find the accuracies
|
||||
diaginv = diag(inv(MME))
|
||||
reliability = ones(Float64, length(diaginv)) .- diaginv .* λ
|
||||
|
||||
# Find how many traits we found BLUE for
|
||||
numgroups = length(reliability) - numanimals
|
||||
|
||||
# Split the BLUP and BLUE results
|
||||
β = solutions[1:numgroups]
|
||||
μ = solutions[numgroups+1:end]
|
||||
μ_reliability = reliability[numgroups+1:end]
|
||||
|
||||
# Extract the names of the traits
|
||||
traitname = names(data)[end]
|
||||
|
||||
# Start printing results to output
|
||||
fileID = open(savepath, "w")
|
||||
write(fileID, "beefblup Results Report\n")
|
||||
write(fileID, "Produced using beefblup (")
|
||||
write(fileID, "https://github.com/millironx/beefblup")
|
||||
write(fileID, ")\n\n")
|
||||
write(fileID, "Input:\t")
|
||||
write(fileID, path)
|
||||
write(fileID, "\nAnalysis performed:\t")
|
||||
write(fileID, string(Dates.today()))
|
||||
write(fileID, "\nTrait examined:\t")
|
||||
write(fileID, traitname)
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Print base population stats
|
||||
write(fileID, "Base Population:\n")
|
||||
for i in 1:length(fixedeffects)
|
||||
effect = fixedeffects[i]
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(effect.name))
|
||||
write(fileID, ":\t")
|
||||
write(fileID, string(effect.basetrait))
|
||||
write(fileID, "\n")
|
||||
end
|
||||
|
||||
write(fileID, "\tMean ")
|
||||
write(fileID, traitname)
|
||||
write(fileID, ":\t")
|
||||
write(fileID, string(solutions[1]))
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Contemporary group adjustments
|
||||
counter = 2
|
||||
write(fileID, "Contemporary Group Effects:\n")
|
||||
for i in 1:length(fixedeffects)
|
||||
effect = fixedeffects[i]
|
||||
write(fileID, "\t")
|
||||
write(fileID, effect.name)
|
||||
write(fileID, "\tEffect\n")
|
||||
for j in 1:length(effect.alltraits)
|
||||
types = effect.alltraits[j]
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(types))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(β[counter]))
|
||||
write(fileID, "\n")
|
||||
|
||||
counter = counter + 1
|
||||
end
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\n")
|
||||
|
||||
# Expected breeding values
|
||||
write(fileID, "Expected Breeding Values:\n")
|
||||
write(fileID, "\tID\tEBV\tReliability\n")
|
||||
for i in 1:numanimals
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(data.id[i]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(μ[i]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(μ_reliability[i]))
|
||||
write(fileID, "\n")
|
||||
end
|
||||
|
||||
write(fileID, "\n - END REPORT -")
|
||||
close(fileID)
|
||||
|
||||
end
|
||||
|
||||
"""
|
||||
fixedeffectmatrix(fixedeffectdata::DataFrame)
|
||||
|
||||
Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed
|
||||
effects listed in `fixedeffectdata`.
|
||||
|
||||
Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the
|
||||
actual matrix, and `fixedeffects` is the contemporary groupings.
|
||||
"""
|
||||
function fixedeffectmatrix(fixedeffectdata::DataFrame)
|
||||
# Declare an empty return matrix
|
||||
fixedeffects = FixedEffect[]
|
||||
|
||||
# Add each trait to the array
|
||||
for i in 1:size(fixedeffectdata)[2]
|
||||
name = names(fixedeffectdata)[i]
|
||||
traits = eachcol(fixedeffectdata)[i]
|
||||
|
||||
if length(unique(traits)) > 1
|
||||
push!(fixedeffects, FixedEffect(name, traits))
|
||||
else
|
||||
@warn string("column '", name, "' does not have any unique animals and will be dropped from analysis")
|
||||
pname = propertynames(fixedeffectdata)[i]
|
||||
DataFrames.select!(fixedeffectdata, Not(pname))
|
||||
end
|
||||
end
|
||||
|
||||
X = ones(Int64, (size(fixedeffectdata)[1], 1))
|
||||
|
||||
for i in 1:length(fixedeffects)
|
||||
trait = fixedeffects[i]
|
||||
for phenotype in trait.alltraits
|
||||
X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2)
|
||||
end
|
||||
end
|
||||
|
||||
return X, fixedeffects
|
||||
end
|
||||
|
||||
"""
|
||||
additiverelationshipmatrix(id, dam, sire)
|
||||
|
||||
Returns the additive numerator relationship matrix based on the pedigree provided in `dam`
|
||||
and `sire` for animals in `id`.
|
||||
|
||||
"""
|
||||
function additiverelationshipmatrix(id::AbstractVector, damid::AbstractVector, sireid::AbstractVector)
|
||||
# Sanity-check for valid pedigree
|
||||
if !(length(id) == length(damid) && length(damid) == length(sireid))
|
||||
throw(ArgumentError("id, dam, and sire must be of the same length"))
|
||||
end
|
||||
|
||||
# Convert to positions
|
||||
dam = indexin(damid, id)
|
||||
sire = indexin(sireid, id)
|
||||
|
||||
# Calculate loop iterations
|
||||
numanimals = length(dam)
|
||||
|
||||
# Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals)
|
||||
|
||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||
# Henderson
|
||||
for i in 1:numanimals
|
||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i - 1)
|
||||
A[j,i] = 0.5 * (A[j,sire[i]] + A[j,dam[i]])
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1 + 0.5 * A[sire[i], dam[i]]
|
||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||
for j in 1:(i - 1)
|
||||
A[j,i] = 0.5 * A[j,dam[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i - 1)
|
||||
A[j,i] = 0.5 * A[j,sire[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
else
|
||||
for j in 1:(i - 1)
|
||||
A[j,i] = 0
|
||||
A[i,j] = 0
|
||||
end
|
||||
A[i,i] = 1
|
||||
end
|
||||
end
|
||||
|
||||
return A
|
||||
end
|
||||
|
||||
"""
|
||||
renamecolstospec(::DataFrame)
|
||||
|
||||
Renames the first four columns of the beefblup data sheet so that they can be referred to by
|
||||
name instead of by column index, regardless of user input.
|
||||
"""
|
||||
function renamecolstospec!(df::DataFrame)
|
||||
# Pull out the fixed-effect and observation name
|
||||
othernames = propertynames(df)[5:end]
|
||||
|
||||
# Put specification column names and user-defined names together
|
||||
allnames = cat([:id, :birthdate, :dam, :sire], othernames, dims=1)
|
||||
|
||||
# Rename in the DataFrame
|
||||
rename!(df, allnames, makeunique=true)
|
||||
return df
|
||||
end
|
||||
|
||||
struct FixedEffect
|
||||
name::String
|
||||
basetrait::Any
|
||||
alltraits::AbstractArray{Any}
|
||||
end
|
||||
|
||||
function FixedEffect(name::String, incidences)
|
||||
basetrait = last(unique(incidences))
|
||||
types = unique(incidences)[1:end-1]
|
||||
return FixedEffect(name, basetrait, types)
|
||||
end
|
||||
|
||||
|
||||
end
|
|
@ -1,21 +0,0 @@
|
|||
using BeefBLUP
|
||||
using DataFrames
|
||||
using Test
|
||||
|
||||
@testset "BeefBLUP.jl" begin
|
||||
# Write your tests here.
|
||||
correctX = [1 1 0 1; 1 1 0 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 0 1]
|
||||
fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"])
|
||||
@test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX
|
||||
correctA = [1 0 1/2 1/2 1/2 0 0;
|
||||
0 1 0 0 1/2 1/2 0;
|
||||
1/2 0 1 1/4 1/4 0 0;
|
||||
1/2 0 1/4 1 1/4 0 0;
|
||||
1/2 1/2 1/4 1/4 1 1/4 0;
|
||||
0 1/2 0 0 1/4 1 0;
|
||||
0 0 0 0 0 0 1]
|
||||
id = collect(1:7)
|
||||
dam_id = [missing, missing, missing, missing, 2, 2, missing]
|
||||
sire_id = [missing, missing, 1, 1, 1, missing, missing]
|
||||
@test BeefBLUP.additiverelationshipmatrix(id, dam_id, sire_id) == correctA
|
||||
end
|
Loading…
Reference in a new issue