mirror of
https://github.com/MillironX/beefblup.git
synced 2024-12-22 09:08:16 +00:00
Merge branch 'release/v0.2'
This commit is contained in:
commit
487f1c30df
9 changed files with 337 additions and 689 deletions
Binary file not shown.
Binary file not shown.
|
@ -1,13 +0,0 @@
|
||||||
# beefblup install
|
|
||||||
# Prepares the Julia environment for using beefblup by installing the requisite
|
|
||||||
# packages
|
|
||||||
# Usage: julia install.jl
|
|
||||||
# (C) 2020 Thomas A. Christensen II
|
|
||||||
# Licensed under BSD-3-Clause License
|
|
||||||
|
|
||||||
# Import the package manager
|
|
||||||
using Pkg
|
|
||||||
|
|
||||||
# Install requisite packages
|
|
||||||
Pkg.add("XLSX")
|
|
||||||
Pkg.add("Gtk")
|
|
|
@ -1,341 +0,0 @@
|
||||||
% beefblup
|
|
||||||
% Main script for performing single-variate BLUP to find beef cattle
|
|
||||||
% breeding values
|
|
||||||
% Usage: beefblup
|
|
||||||
% (C) 2020 Thomas A. Christensen II
|
|
||||||
% Licensed under BSD-3-Clause License
|
|
||||||
|
|
||||||
% Prepare the workspace for computation
|
|
||||||
clear
|
|
||||||
clc
|
|
||||||
close all
|
|
||||||
|
|
||||||
%% Display stuff
|
|
||||||
disp('beefblup v. 0.1')
|
|
||||||
disp('(C) 2020 Thomas A. Christensen II')
|
|
||||||
disp('https://github.com/millironx/beefblup')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Prompt User
|
|
||||||
% Ask for an input spreadsheet
|
|
||||||
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
|
|
||||||
|
|
||||||
% Ask for an ouput text file
|
|
||||||
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
|
|
||||||
|
|
||||||
% Ask for heritability
|
|
||||||
h2 = input('What is the heritability for this trait? >> ');
|
|
||||||
|
|
||||||
|
|
||||||
%% Import input file
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Importing Excel file...')
|
|
||||||
|
|
||||||
% Import data from a suitable spreadsheet
|
|
||||||
fullname = [path name];
|
|
||||||
clear name path
|
|
||||||
[~, ~, data] = xlsread(fullname);
|
|
||||||
|
|
||||||
disp('Importing Excel file... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Process input file
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Processing and formatting data...')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
% Extract the headers into a separate array
|
|
||||||
headers = data(1,:);
|
|
||||||
data(1,:) = [];
|
|
||||||
|
|
||||||
% Convert the string dates to numbers
|
|
||||||
data(:,2) = num2cell(datenum(data(:,2)));
|
|
||||||
|
|
||||||
% Sort the array by date
|
|
||||||
data = sortrows(data,2);
|
|
||||||
|
|
||||||
% Coerce all id fields to string format
|
|
||||||
strings = [1 3 4];
|
|
||||||
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
|
|
||||||
|
|
||||||
% Define fields to hold id values for animals and their parents
|
|
||||||
ids = char(data{:,1});
|
|
||||||
damids = char(data{:,3});
|
|
||||||
sireids = char(data{:,4});
|
|
||||||
numanimals = length(data(:,1));
|
|
||||||
|
|
||||||
% Define fields to hold the index values for animals and their parents
|
|
||||||
dam = zeros(numanimals,1);
|
|
||||||
sire = zeros(numanimals,1);
|
|
||||||
|
|
||||||
% Find all row numbers where an animal was a parent
|
|
||||||
for i=1:numanimals
|
|
||||||
% Find all animals that this animal birthed
|
|
||||||
dammatch = ismember(damids, ids(i,:), 'rows');
|
|
||||||
damindexes = find(dammatch == 1);
|
|
||||||
dam(damindexes) = i;
|
|
||||||
|
|
||||||
% Find all animals that this animal sired
|
|
||||||
sirematch = ismember(sireids, ids(i,:), 'rows');
|
|
||||||
sireindexes = find(sirematch == 1);
|
|
||||||
sire(sireindexes) = i;
|
|
||||||
end
|
|
||||||
|
|
||||||
% Store column numbers that need to be deleted
|
|
||||||
% Column 6 contains an intermediate Excel calculation and always needs to
|
|
||||||
% be deleted
|
|
||||||
colstodelete = 6;
|
|
||||||
|
|
||||||
% Coerce each group to string format
|
|
||||||
for i = 7:length(headers)
|
|
||||||
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
|
|
||||||
end
|
|
||||||
|
|
||||||
% Find any columns that need to be deleted
|
|
||||||
for i = 7:length(headers)
|
|
||||||
if length(uniquecell(data(:,i))) <= 1
|
|
||||||
colname = headers{i};
|
|
||||||
disp(['Column "' colname '" does not have any unique animals and will be removed'])
|
|
||||||
disp('from this analysis');
|
|
||||||
colstodelete = [colstodelete i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
% Delete the appropriate columns from the datasheet and the headers
|
|
||||||
data(:,colstodelete) = [];
|
|
||||||
headers(colstodelete) = [];
|
|
||||||
|
|
||||||
% Determine how many contemporary groups there are
|
|
||||||
numgroups = ones(1, length(headers)-5);
|
|
||||||
for i = 6:length(headers)
|
|
||||||
numgroups(i-5) = length(uniquecell(data(:,i)));
|
|
||||||
end
|
|
||||||
|
|
||||||
% If there are more groups than animals, then the analysis cannot continue
|
|
||||||
if sum(numgroups) >= numanimals
|
|
||||||
disp('There are more contemporary groups than animals. The analysis will now abort.');
|
|
||||||
return
|
|
||||||
end
|
|
||||||
|
|
||||||
% Define a "normal" animal as one of the last in the groups, provided that
|
|
||||||
% all traits do not have null values
|
|
||||||
normal = cell([1 length(headers)-5]);
|
|
||||||
for i = 6:length(headers)
|
|
||||||
for j = numanimals:-1:1
|
|
||||||
if not(cellfun(@isempty, data(j,i)))
|
|
||||||
normal(i - 5) = data(j,i);
|
|
||||||
break
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
% Print the results of the "normal" definition
|
|
||||||
disp(' ')
|
|
||||||
disp('For the purposes of this analysis, a "normal" animal will be defined')
|
|
||||||
disp('by the following traits:')
|
|
||||||
for i = 6:length(headers)
|
|
||||||
disp([headers{i} ': ' normal{i-5}])
|
|
||||||
end
|
|
||||||
disp(' ')
|
|
||||||
disp('If no animal matching this description exists, the results may appear')
|
|
||||||
disp('outlandish, but are still as correct as the accuracy suggests')
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
disp('Processing and formatting data... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Create the fixed-effect matrix
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Creating the fixed-effect matrix...')
|
|
||||||
|
|
||||||
% Form the fixed effect matrix
|
|
||||||
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
|
|
||||||
X(:,1) = ones(1, numanimals);
|
|
||||||
|
|
||||||
% Create an external counter that will increment through both loops
|
|
||||||
I = 2;
|
|
||||||
|
|
||||||
% Store the traits in a string cell array
|
|
||||||
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
|
|
||||||
|
|
||||||
% Iterate through each group
|
|
||||||
for i = 1:length(normal)
|
|
||||||
% Find the traits that are present in this trait
|
|
||||||
traits = uniquecell(data(:,i+5));
|
|
||||||
|
|
||||||
% Remove the "normal" version from the analysis
|
|
||||||
normalindex = find(strcmp(traits, normal{i}));
|
|
||||||
traits(normalindex) = [];
|
|
||||||
|
|
||||||
% Iterate inside of the group
|
|
||||||
for j = 1:length(traits)
|
|
||||||
matchedindex = find(strcmp(data(:,i+5), traits{j}));
|
|
||||||
X(matchedindex, I) = 1;
|
|
||||||
|
|
||||||
% Add this trait to the string
|
|
||||||
adjustedtraits(I - 1) = traits(j);
|
|
||||||
|
|
||||||
% Increment the big counter
|
|
||||||
I = I + 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
disp('Creating the fixed-effect matrix... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Additive relationship matrix
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Creating the additive relationship matrix...')
|
|
||||||
|
|
||||||
% Create an empty matrix for the additive relationship matrix
|
|
||||||
A = zeros(numanimals, numanimals);
|
|
||||||
|
|
||||||
% Create the additive relationship matrix by the FORTRAN method presented
|
|
||||||
% by Henderson
|
|
||||||
for i = 1:numanimals
|
|
||||||
if dam(i) ~= 0 && sire(i) ~= 0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*(A(j,sire(i))+A(j,dam(i)));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1 + 0.5*A(sire(i),dam(i));
|
|
||||||
elseif dam(i) ~= 0 && sire(i) == 0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*A(j,dam(i));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
elseif dam(i) == 0 && sire(i) ~=0
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0.5*A(j,sire(i));
|
|
||||||
A(i,j) = A(j,i);
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
else
|
|
||||||
for j = 1:(i-1)
|
|
||||||
A(j,i) = 0;
|
|
||||||
A(i,j) = 0;
|
|
||||||
end
|
|
||||||
A(i,i) = 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
disp('Creating the additive relationship matrix... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Perform BLUP
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Solving the mixed-model equations')
|
|
||||||
|
|
||||||
% Extract the observed data
|
|
||||||
Y = cell2mat(data(:, 5));
|
|
||||||
|
|
||||||
% The identity matrix for random effects
|
|
||||||
Z = eye(numanimals, numanimals);
|
|
||||||
|
|
||||||
% Remove items where there is no data
|
|
||||||
nullobs = find(isnan(Y));
|
|
||||||
Z(nullobs, nullobs) = 0;
|
|
||||||
|
|
||||||
% Calculate heritability
|
|
||||||
lambda = (1-h2)/h2;
|
|
||||||
|
|
||||||
% Use the mixed-model equations
|
|
||||||
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
|
|
||||||
|
|
||||||
% Find the accuracies
|
|
||||||
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
|
|
||||||
reliability = 1 - diaginv.*lambda;
|
|
||||||
|
|
||||||
disp('Solving the mixed-model equations... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
||||||
|
|
||||||
%% Output the results
|
|
||||||
tic
|
|
||||||
disp(' ')
|
|
||||||
disp('Saving results...')
|
|
||||||
|
|
||||||
% Find how many traits we found BLUE for
|
|
||||||
numgroups = numgroups - 1;
|
|
||||||
|
|
||||||
% Start printing results to output
|
|
||||||
fileID = fopen([savepath savename], 'w');
|
|
||||||
fprintf(fileID, 'beefblup Results Report\n');
|
|
||||||
fprintf(fileID, 'Produced using beefblup for MATLAB (');
|
|
||||||
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
|
|
||||||
fprintf(fileID, ')\n\n');
|
|
||||||
fprintf(fileID, 'Input:\t');
|
|
||||||
fprintf(fileID, '%s', fullname);
|
|
||||||
fprintf(fileID, '\nAnalysis performed:\t');
|
|
||||||
fprintf(fileID, date);
|
|
||||||
fprintf(fileID, '\nTrait examined:\t');
|
|
||||||
fprintf(fileID, [headers{5}]);
|
|
||||||
fprintf(fileID, '\n\n');
|
|
||||||
|
|
||||||
% Print base population stats
|
|
||||||
fprintf(fileID, 'Base Population:\n');
|
|
||||||
for i = 1:length(numgroups)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [headers{i+5}]);
|
|
||||||
fprintf(fileID, ':\t');
|
|
||||||
fprintf(fileID, [normal{i}]);
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\tMean ');
|
|
||||||
fprintf(fileID, [headers{5}]);
|
|
||||||
fprintf(fileID, ':\t');
|
|
||||||
fprintf(fileID, num2str(solutions(1)));
|
|
||||||
fprintf(fileID, '\n\n');
|
|
||||||
|
|
||||||
I = 2;
|
|
||||||
|
|
||||||
% Contemporary group adjustments
|
|
||||||
fprintf(fileID, 'Contemporary Group Effects:\n');
|
|
||||||
for i = 1:length(numgroups)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [headers{i+5}]);
|
|
||||||
fprintf(fileID, '\tEffect\tReliability\n');
|
|
||||||
for j = 1:numgroups(i)
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [adjustedtraits{I-1}]);
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(solutions(I)));
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(reliability(I)));
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
|
|
||||||
I = I + 1;
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
|
|
||||||
% Expected breeding values
|
|
||||||
fprintf(fileID, 'Expected Breeding Values:\n');
|
|
||||||
fprintf(fileID, '\tID\tEBV\tReliability\n');
|
|
||||||
for i = 1:numanimals
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, [data{i,1}]);
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(solutions(i+I-1)));
|
|
||||||
fprintf(fileID, '\t');
|
|
||||||
fprintf(fileID, num2str(reliability(i+I-1)));
|
|
||||||
fprintf(fileID, '\n');
|
|
||||||
end
|
|
||||||
|
|
||||||
fprintf(fileID, '\n - END REPORT -');
|
|
||||||
fclose(fileID);
|
|
||||||
|
|
||||||
disp('Saving results... Done!')
|
|
||||||
toc
|
|
||||||
disp(' ')
|
|
|
@ -1,9 +0,0 @@
|
||||||
% uniquenan
|
|
||||||
% Serves the same purpose as UNIQUE, but ensures any empty cells are not
|
|
||||||
% counted
|
|
||||||
function y = uniquecell(x)
|
|
||||||
y = unique(x);
|
|
||||||
if any(cellfun(@isempty, y))
|
|
||||||
y(cellfun(@isempty, y)) = [];
|
|
||||||
end
|
|
||||||
end
|
|
6
Project.toml
Normal file
6
Project.toml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
[deps]
|
||||||
|
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||||
|
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||||
|
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||||
|
Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
|
||||||
|
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
77
README.md
77
README.md
|
@ -13,35 +13,34 @@ Why? It's part of my effort to
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
||||||
2. Open a new Julia window and type the `]` key
|
2. Download the [beefblup ZIP
|
||||||
3. Type `add XLSX Gtk` and press **Enter**
|
file](https://github.com/MillironX/beefblup/archive/refs/tags/v0.2.zip) and unzip it someplace memorable
|
||||||
|
3. In your file explorer, copy the address of the "beefblup" folder
|
||||||
Alternatively, you can run the [install
|
4. Launch Julia
|
||||||
script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from
|
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||||
Julia.
|
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||||
|
6. Type the `]` key
|
||||||
|
7. Type `activate .` and press **Enter**
|
||||||
|
8. Type `instantiate` and press **Enter**
|
||||||
|
9. Installation is done: you can close the Julia window
|
||||||
|
|
||||||
## How to Use
|
## How to Use
|
||||||
|
|
||||||
> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118).
|
1. Make a copy of the "sample.csv" spreadsheet and replace the data with your own
|
||||||
> Although it's tempting to just open up beefblup in Juno and press the big play
|
1. The trait you wish to calculate EBVs for always goes in the rightmost column
|
||||||
> button, it won't work. Follow these instructions until it's fixed. If you
|
2. If you wish to add more contemporary group traits to your analysis, include them before the rightmost column
|
||||||
> don't know what Juno is: ignore this message.
|
2. Save and close
|
||||||
|
3. In your file explorer, copy the address of the "beefblup" folder
|
||||||
1. Download the [beefblup ZIP
|
4. Launch Julia
|
||||||
file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it
|
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||||
someplace memorable
|
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||||
2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own
|
6. Type the `]` key
|
||||||
3. If you wish to add more contemporary group traits to your analysis, replace
|
7. Type `activate .` and press **Enter**
|
||||||
or add them to the right of the Purple section
|
8. Press **Backspace**
|
||||||
4. Save and close
|
9. Type `include("src/beefblup.jl")` and press **Enter**
|
||||||
5. In your file explorer, copy the address of the "Julia" folder
|
10. Select the spreadsheet you created in steps 1-4
|
||||||
6. Launch Julia
|
11. Follow the on-screen prompts
|
||||||
7. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
12. **#KeepEPDsReal!**
|
||||||
`cd("C:\Users\MillironX\Documents\beefblup\Julia")`)
|
|
||||||
8. Type `include("beefblup.jl")` and press **Enter**
|
|
||||||
9. Select the spreadsheet you created in steps 1-4
|
|
||||||
10. Follow the on-screen prompts
|
|
||||||
11. **#KeepEPDsReal!**
|
|
||||||
|
|
||||||
## For Programmers
|
## For Programmers
|
||||||
|
|
||||||
|
@ -51,19 +50,19 @@ Julia.
|
||||||
|
|
||||||
### Development Roadmap
|
### Development Roadmap
|
||||||
|
|
||||||
| Version | Feature |
|
| Version | Feature | Status |
|
||||||
| ------- | ------------------------------------------------------------------- |
|
| ------- | ----------------------------------------------------------------------------------- | ------------------ |
|
||||||
| v0.1 | Julia port of original MATLAB script |
|
| v0.1 | Julia port of original MATLAB script | :heavy_check_mark: |
|
||||||
| v0.2 | Spreadsheet format redesign |
|
| v0.2 | Spreadsheet format redesign | :heavy_check_mark: |
|
||||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) |
|
| v0.3 | API rewrite (change to function calls and package format instead of script running) | |
|
||||||
| v0.4 | Add GUI for all options |
|
| v0.4 | Add GUI for all options | |
|
||||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects |
|
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | |
|
||||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) |
|
| v0.6 | Repeated measurement BLUP (aka dairyblup) | |
|
||||||
| v0.7 | Multiple trait BLUP |
|
| v0.7 | Multiple trait BLUP | |
|
||||||
| v0.8 | Maternal effects BLUP |
|
| v0.8 | Maternal effects BLUP | |
|
||||||
| v0.9 | Genomic BLUP |
|
| v0.9 | Genomic BLUP | |
|
||||||
| v0.10 | beefblup binaries |
|
| v0.10 | beefblup binaries | |
|
||||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) |
|
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | |
|
||||||
|
|
||||||
### Bug Reports
|
### Bug Reports
|
||||||
|
|
||||||
|
|
8
data/sample.csv
Normal file
8
data/sample.csv
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
id,birthdate,dam,sire,year,sex,weaning_weight
|
||||||
|
1,1/1/1990,,,1990,male,354
|
||||||
|
2,1/1/1990,,,1990,female,251
|
||||||
|
3,1/1/1991,,1,1991,male,327
|
||||||
|
4,1/1/1991,,1,1991,female,328
|
||||||
|
5,1/1/1991,2,1,1991,male,301
|
||||||
|
6,1/1/1991,2,,1991,female,270
|
||||||
|
7,1/1/1992,,,1992,male,330
|
|
572
Julia/beefblup.jl → src/beefblup.jl
Normal file → Executable file
572
Julia/beefblup.jl → src/beefblup.jl
Normal file → Executable file
|
@ -1,287 +1,285 @@
|
||||||
# beefblup
|
#!/bin/bash
|
||||||
# Main script for performing single-variate BLUP to find beef cattle
|
#=
|
||||||
# breeding values
|
exec julia --project=$(realpath $(dirname $(dirname "${BASH_SOURCE[0]}"))) "${BASH_SOURCE[0]}" "$@"
|
||||||
# Usage: julia beefblup.jl
|
=#
|
||||||
# (C) 2020 Thomas A. Christensen II
|
# beefblup
|
||||||
# Licensed under BSD-3-Clause License
|
# Main script for performing single-variate BLUP to find beef cattle
|
||||||
|
# breeding values
|
||||||
# Import the required packages
|
# Usage: julia beefblup.jl
|
||||||
using XLSX
|
# (C) 2021 Thomas A. Christensen II
|
||||||
using LinearAlgebra
|
# Licensed under BSD-3-Clause License
|
||||||
using Dates
|
# cSpell:includeRegExp #.*
|
||||||
using Gtk
|
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||||
|
|
||||||
# Display stuff
|
# Import the required packages
|
||||||
println("beefblup v 0.1")
|
using CSV
|
||||||
println("(C) 2020 Thomas A. Christensen II")
|
using DataFrames
|
||||||
println("https://github.com/millironx/beefblup")
|
using LinearAlgebra
|
||||||
print("\n")
|
using Dates
|
||||||
|
using Gtk
|
||||||
### Prompt User
|
|
||||||
# Ask for an input spreadsheet
|
# Display stuff
|
||||||
path = open_dialog_native(
|
println("beefblup v 0.2")
|
||||||
"Select a beefblup worksheet",
|
println("(C) 2021 Thomas A. Christensen II")
|
||||||
GtkNullContainer(),
|
println("https://github.com/millironx/beefblup")
|
||||||
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
|
print("\n")
|
||||||
)
|
|
||||||
|
### Prompt User
|
||||||
# Ask for an output text filename
|
# Ask for an input spreadsheet
|
||||||
savepath = save_dialog_native(
|
path = open_dialog_native(
|
||||||
"Save your beefblup results",
|
"Select a beefblup worksheet",
|
||||||
GtkNullContainer(),
|
GtkNullContainer(),
|
||||||
(GtkFileFilter("*.txt", name="Results file"),
|
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||||
"*.txt")
|
)
|
||||||
)
|
|
||||||
|
# Ask for an output text filename
|
||||||
# Ask for heritability
|
savepath = save_dialog_native(
|
||||||
print("What is the heritability for this trait?> ")
|
"Save your beefblup results",
|
||||||
h2 = parse(Float64, readline(stdin))
|
GtkNullContainer(),
|
||||||
|
(GtkFileFilter("*.txt", name="Results file"),
|
||||||
### Import input filename
|
"*.txt")
|
||||||
print("[🐮]: Importing Excel file...")
|
)
|
||||||
|
|
||||||
# Import data from a suitable spreadsheet
|
# Ask for heritability
|
||||||
data = XLSX.readxlsx(path)[1][:]
|
print("What is the heritability for this trait?> ")
|
||||||
|
h2 = parse(Float64, readline(stdin))
|
||||||
print("Done!\n")
|
|
||||||
|
### Import input filename
|
||||||
### Process input file
|
print("[🐮]: Importing data file...")
|
||||||
print("[🐮]: Processing and formatting data...")
|
|
||||||
|
# Import data from a suitable spreadsheet
|
||||||
# Extract the headers into a separate array
|
data = DataFrame(CSV.File(path))
|
||||||
headers = data[1,:]
|
|
||||||
data = data[2:end,:]
|
print("Done!\n")
|
||||||
|
|
||||||
# Sort the array by date
|
### Process input file
|
||||||
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
|
print("[🐮]: Processing and formatting data...")
|
||||||
|
|
||||||
# Define fields to hold id values for animals and their parents
|
# Sort the array by date
|
||||||
ids = string.(data[:,1])
|
sort!(data, :birthdate)
|
||||||
damids = string.(data[:,3])
|
|
||||||
sireids = string.(data[:,4])
|
# Define fields to hold id values for animals and their parents
|
||||||
numanimals = length(ids)
|
numanimals = length(data.id)
|
||||||
|
|
||||||
# Find the index values for animals and their parents
|
# Find the index values for animals and their parents
|
||||||
dam = indexin(damids, ids)
|
dam = indexin(data.dam, data.id)
|
||||||
sire = indexin(sireids, ids)
|
sire = indexin(data.sire, data.id)
|
||||||
|
|
||||||
# Store column numbers that need to be deleted
|
# Extract all of the fixed effects
|
||||||
# Column 6 contains an intermediate Excel calculation and always need to
|
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1]
|
||||||
# be deleted
|
|
||||||
colstokeep = [1, 2, 3, 4, 5]
|
# Find any columns that need to be deleted
|
||||||
|
for i in 1:ncol(fixedfx)
|
||||||
# Find any columns that need to be deleted
|
if length(unique(fixedfx[:,i])) <= 1
|
||||||
for i in 7:length(headers)
|
colname = names(fixedfx)[i]
|
||||||
if length(unique(data[:,i])) <= 1
|
print("Column '")
|
||||||
colname = headers[i]
|
print(colname)
|
||||||
print("Column '")
|
print("' does not have any unique animals and will be removed from this analysis\n")
|
||||||
print(colname)
|
deletecols!(fixedfx,i)
|
||||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
end
|
||||||
else
|
end
|
||||||
push!(colstokeep, i)
|
|
||||||
end
|
# Determine how many contemporary groups there are
|
||||||
end
|
numtraits = ncol(fixedfx)
|
||||||
|
numgroups = ones(1, numtraits)
|
||||||
# Delete the appropriate columns from the datasheet and the headers
|
for i in 1:numtraits
|
||||||
data = data[:, colstokeep]
|
numgroups[i] = length(unique(fixedfx[:,i]))
|
||||||
headers = headers[colstokeep]
|
end
|
||||||
|
|
||||||
# Determine how many contemporary groups there are
|
# If there are more groups than animals, then the analysis cannot continue
|
||||||
numgroups = ones(1, length(headers)-5)
|
if sum(numgroups) >= numanimals
|
||||||
for i in 6:length(headers)
|
println("There are more contemporary groups than animals. The analysis will
|
||||||
numgroups[i-5] = length(unique(data[:,i]))
|
now abort.")
|
||||||
end
|
exit()
|
||||||
|
end
|
||||||
# If there are more groups than animals, then the analysis cannot continue
|
|
||||||
if sum(numgroups) >= numanimals
|
# Define a "normal" animal as one of the last in the groups, provided that
|
||||||
println("There are more contemporary groups than animals. The analysis will
|
# all traits do not have null values
|
||||||
now abort.")
|
normal = Array{String}(undef,1,numtraits)
|
||||||
exit()
|
for i in 1:numtraits
|
||||||
end
|
for j in numanimals:-1:1
|
||||||
|
if !ismissing(fixedfx[j,i])
|
||||||
# Define a "normal" animal as one of the last in the groups, provided that
|
normal[i] = string(fixedfx[j,i])
|
||||||
# all traits do not have null values
|
break
|
||||||
normal = Array{String}(undef,1,length(headers)-5)
|
end
|
||||||
for i in 6:length(headers)
|
end
|
||||||
for j in numanimals:-1:1
|
end
|
||||||
if !ismissing(data[j,i])
|
|
||||||
normal[i-5] = string(data[j,i])
|
print("Done!\n")
|
||||||
break
|
|
||||||
end
|
### Create the fixed-effect matrix
|
||||||
end
|
print("[🐮]: Creating the fixed-effect matrix...")
|
||||||
end
|
|
||||||
|
# Form the fixed-effect matrix
|
||||||
print("Done!\n")
|
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
||||||
|
X[:,1] = ones(Int8, 1, numanimals)
|
||||||
### Create the fixed-effect matrix
|
|
||||||
print("[🐮]: Creating the fixed-effect matrix...")
|
# Create an external counter that will increment through both loops
|
||||||
|
counter = 2
|
||||||
# Form the fixed-effect matrix
|
|
||||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
# Store the traits in a string array
|
||||||
X[:,1] = ones(Int8, 1, numanimals)
|
adjustedtraits =
|
||||||
|
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
||||||
# Create an external counter that will increment through both loops
|
# Iterate through each group
|
||||||
counter = 2
|
for i in 1:length(normal)
|
||||||
|
# Find the traits that are present in this trait
|
||||||
# Store the traits in a string array
|
localdata = string.(fixedfx[:,i])
|
||||||
adjustedtraits =
|
traits = unique(localdata)
|
||||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
# Remove the normal version from the analysis
|
||||||
# Iterate through each group
|
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||||
for i in 1:length(normal)
|
# Iterate inside of the group
|
||||||
# Find the traits that are present in this trait
|
for j in 1:(length(effecttraits))
|
||||||
localdata = string.(data[:,i+5])
|
matchedindex = findall(x -> x == effecttraits[j], localdata)
|
||||||
traits = unique(localdata)
|
X[matchedindex, counter] .= 1
|
||||||
# Remove the normal version from the analysis
|
# Add this trait to the string
|
||||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
adjustedtraits[counter - 1] = traits[j]
|
||||||
# Iterate inside of the group
|
# Increment the big counter
|
||||||
for j in 1:length(effecttraits)
|
global counter = counter + 1
|
||||||
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
end
|
||||||
X[matchedindex, counter] .= 1
|
end
|
||||||
# Add this trait to the string
|
|
||||||
adjustedtraits[counter - 1] = traits[j]
|
print("Done!\n")
|
||||||
# Increment the big counter
|
|
||||||
global counter = counter + 1
|
### Additive relationship matrix
|
||||||
end
|
print("[🐮]: Creating additive relationship matrix...")
|
||||||
end
|
|
||||||
|
# Create an empty matrix for the additive relationship matrix
|
||||||
print("Done!\n")
|
A = zeros(numanimals, numanimals)
|
||||||
|
|
||||||
### Additive relationship matrix
|
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||||
print("[🐮]: Creating additive relationship matrix...")
|
# Henderson
|
||||||
|
for i in 1:numanimals
|
||||||
# Create an empty matrix for the additive relationship matrix
|
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
A = zeros(numanimals, numanimals)
|
for j in 1:(i-1)
|
||||||
|
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
||||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
A[i,j] = A[j,i]
|
||||||
# Henderson
|
end
|
||||||
for i in 1:numanimals
|
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
||||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||||
for j in 1:(i-1)
|
for j in 1:(i-1)
|
||||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
A[j,i] = 0.5*A[j,dam[i]]
|
||||||
A[i,j] = A[j,i]
|
A[i,j] = A[j,i]
|
||||||
end
|
end
|
||||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
A[i,i] = 1
|
||||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
for j in 1:(i-1)
|
for j in 1:(i-1)
|
||||||
A[j,i] = 0.5*A[j,dam[i]]
|
A[j,i] = 0.5*A[j,sire[i]]
|
||||||
A[i,j] = A[j,i]
|
A[i,j] = A[j,i]
|
||||||
end
|
end
|
||||||
A[i,i] = 1
|
A[i,i] = 1
|
||||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
else
|
||||||
for j in 1:(i-1)
|
for j in 1:(i-1)
|
||||||
A[j,i] = 0.5*A[j,sire[i]]
|
A[j,i] = 0
|
||||||
A[i,j] = A[j,i]
|
A[i,j] = 0
|
||||||
end
|
end
|
||||||
A[i,i] = 1
|
A[i,i] = 1
|
||||||
else
|
end
|
||||||
for j in 1:(i-1)
|
end
|
||||||
A[j,i] = 0
|
|
||||||
A[i,j] = 0
|
print("Done!\n")
|
||||||
end
|
|
||||||
A[i,i] = 1
|
### Perform BLUP
|
||||||
end
|
print("[🐮]: Solving the mixed-model equations...")
|
||||||
end
|
|
||||||
|
# Extract the observed data
|
||||||
print("Done!\n")
|
Y = convert(Array{Float64}, data[:,end])
|
||||||
|
|
||||||
### Perform BLUP
|
# The random effects matrix
|
||||||
print("[🐮]: Solving the mixed-model equations...")
|
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||||
|
|
||||||
# Extract the observed data
|
# Remove items where there is no data
|
||||||
Y = convert(Array{Float64}, data[:,5])
|
nullobs = findall(isnothing, Y)
|
||||||
|
Z[nullobs, nullobs] .= 0
|
||||||
# The random effects matrix
|
|
||||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
# Calculate heritability
|
||||||
|
λ = (1-h2)/h2
|
||||||
# Remove items where there is no data
|
|
||||||
nullobs = findall(isnothing, Y)
|
# Use the mixed-model equations
|
||||||
Z[nullobs, nullobs] .= 0
|
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
||||||
|
MMY = [X'*Y; Z'*Y]
|
||||||
# Calculate heritability
|
solutions = MME\MMY
|
||||||
λ = (1-h2)/h2
|
|
||||||
|
# Find the accuracies
|
||||||
# Use the mixed-model equations
|
diaginv = diag(inv(MME))
|
||||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
||||||
MMY = [X'*Y; Z'*Y]
|
|
||||||
solutions = MME\MMY
|
print("Done!\n")
|
||||||
|
|
||||||
# Find the accuracies
|
### Output the results
|
||||||
diaginv = diag(inv(MME))
|
print("[🐮]: Saving results...")
|
||||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
|
||||||
|
# Find how many traits we found BLUE for
|
||||||
print("Done!\n")
|
numgroups = numgroups .- 1
|
||||||
|
|
||||||
### Output the results
|
# Extract the names of the traits
|
||||||
print("[🐮]: Saving results...")
|
fixedfxnames = names(fixedfx)
|
||||||
|
traitname = names(data)[end]
|
||||||
# Find how many traits we found BLUE for
|
|
||||||
numgroups = numgroups .- 1
|
# Start printing results to output
|
||||||
|
fileID = open(savepath, "w")
|
||||||
# Start printing results to output
|
write(fileID, "beefblup Results Report\n")
|
||||||
fileID = open(savepath, "w")
|
write(fileID, "Produced using beefblup (")
|
||||||
write(fileID, "beefblup Results Report\n")
|
write(fileID, "https://github.com/millironx/beefblup")
|
||||||
write(fileID, "Produced using beefblup for Julia (")
|
write(fileID, ")\n\n")
|
||||||
write(fileID, "https://github.com/millironx/beefblup")
|
write(fileID, "Input:\t")
|
||||||
write(fileID, ")\n\n")
|
write(fileID, path)
|
||||||
write(fileID, "Input:\t")
|
write(fileID, "\nAnalysis performed:\t")
|
||||||
write(fileID, path)
|
write(fileID, string(Dates.today()))
|
||||||
write(fileID, "\nAnalysis performed:\t")
|
write(fileID, "\nTrait examined:\t")
|
||||||
write(fileID, string(Dates.today()))
|
write(fileID, traitname)
|
||||||
write(fileID, "\nTrait examined:\t")
|
write(fileID, "\n\n")
|
||||||
write(fileID, headers[5])
|
|
||||||
write(fileID, "\n\n")
|
# Print base population stats
|
||||||
|
write(fileID, "Base Population:\n")
|
||||||
# Print base population stats
|
for i in 1:length(normal)
|
||||||
write(fileID, "Base Population:\n")
|
write(fileID, "\t")
|
||||||
for i in 1:length(numgroups)
|
write(fileID, fixedfxnames[i])
|
||||||
write(fileID, "\t")
|
write(fileID, ":\t")
|
||||||
write(fileID, headers[i+5])
|
write(fileID, normal[i])
|
||||||
write(fileID, ":\t")
|
write(fileID, "\n")
|
||||||
write(fileID, normal[i])
|
end
|
||||||
write(fileID, "\n")
|
write(fileID, "\tMean ")
|
||||||
end
|
write(fileID, traitname)
|
||||||
write(fileID, "\tMean ")
|
write(fileID, ":\t")
|
||||||
write(fileID, headers[5])
|
write(fileID, string(solutions[1]))
|
||||||
write(fileID, ":\t")
|
write(fileID, "\n\n")
|
||||||
write(fileID, string(solutions[1]))
|
|
||||||
write(fileID, "\n\n")
|
# Contemporary group adjustments
|
||||||
|
counter = 2
|
||||||
# Contemporary group adjustments
|
write(fileID, "Contemporary Group Effects:\n")
|
||||||
counter = 2
|
for i in 1:length(numgroups)
|
||||||
write(fileID, "Contemporary Group Effects:\n")
|
write(fileID, "\t")
|
||||||
for i in 1:length(numgroups)
|
write(fileID, fixedfxnames[i])
|
||||||
write(fileID, "\t")
|
write(fileID, "\tEffect\tReliability\n")
|
||||||
write(fileID, headers[i+5])
|
for j in 1:numgroups[i]
|
||||||
write(fileID, "\tEffect\tReliability\n")
|
write(fileID, "\t")
|
||||||
for j in 1:numgroups[i]
|
write(fileID, adjustedtraits[counter - 1])
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, adjustedtraits[counter - 1])
|
write(fileID, string(solutions[counter]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(solutions[counter]))
|
write(fileID, string(reliability[counter]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\n")
|
||||||
write(fileID, string(reliability[counter]))
|
|
||||||
write(fileID, "\n")
|
global counter = counter + 1
|
||||||
|
end
|
||||||
global counter = counter + 1
|
write(fileID, "\n")
|
||||||
end
|
end
|
||||||
write(fileID, "\n")
|
write(fileID, "\n")
|
||||||
end
|
|
||||||
write(fileID, "\n")
|
# Expected breeding values
|
||||||
|
write(fileID, "Expected Breeding Values:\n")
|
||||||
# Expected breeding values
|
write(fileID, "\tID\tEBV\tReliability\n")
|
||||||
write(fileID, "Expected Breeding Values:\n")
|
for i in 1:numanimals
|
||||||
write(fileID, "\tID\tEBV\tReliability\n")
|
write(fileID, "\t")
|
||||||
for i in 1:numanimals
|
write(fileID, string(data.id[i]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, ids[i])
|
write(fileID, string(solutions[i+counter-1]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(solutions[i+counter-1]))
|
write(fileID, string(reliability[i+counter-1]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\n")
|
||||||
write(fileID, string(reliability[i+counter-1]))
|
end
|
||||||
write(fileID, "\n")
|
|
||||||
end
|
write(fileID, "\n - END REPORT -")
|
||||||
|
close(fileID)
|
||||||
write(fileID, "\n - END REPORT -")
|
|
||||||
close(fileID)
|
print("Done!\n")
|
||||||
|
|
||||||
print("Done!\n")
|
|
Loading…
Reference in a new issue