mirror of
https://github.com/MillironX/beefblup.git
synced 2024-11-13 03:03:08 +00:00
Merge branch 'release/v0.2'
This commit is contained in:
commit
487f1c30df
9 changed files with 337 additions and 689 deletions
Binary file not shown.
Binary file not shown.
|
@ -1,13 +0,0 @@
|
|||
# beefblup install
|
||||
# Prepares the Julia environment for using beefblup by installing the requisite
|
||||
# packages
|
||||
# Usage: julia install.jl
|
||||
# (C) 2020 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
|
||||
# Import the package manager
|
||||
using Pkg
|
||||
|
||||
# Install requisite packages
|
||||
Pkg.add("XLSX")
|
||||
Pkg.add("Gtk")
|
|
@ -1,341 +0,0 @@
|
|||
% beefblup
|
||||
% Main script for performing single-variate BLUP to find beef cattle
|
||||
% breeding values
|
||||
% Usage: beefblup
|
||||
% (C) 2020 Thomas A. Christensen II
|
||||
% Licensed under BSD-3-Clause License
|
||||
|
||||
% Prepare the workspace for computation
|
||||
clear
|
||||
clc
|
||||
close all
|
||||
|
||||
%% Display stuff
|
||||
disp('beefblup v. 0.1')
|
||||
disp('(C) 2020 Thomas A. Christensen II')
|
||||
disp('https://github.com/millironx/beefblup')
|
||||
disp(' ')
|
||||
|
||||
%% Prompt User
|
||||
% Ask for an input spreadsheet
|
||||
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
|
||||
|
||||
% Ask for an ouput text file
|
||||
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
|
||||
|
||||
% Ask for heritability
|
||||
h2 = input('What is the heritability for this trait? >> ');
|
||||
|
||||
|
||||
%% Import input file
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Importing Excel file...')
|
||||
|
||||
% Import data from a suitable spreadsheet
|
||||
fullname = [path name];
|
||||
clear name path
|
||||
[~, ~, data] = xlsread(fullname);
|
||||
|
||||
disp('Importing Excel file... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Process input file
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Processing and formatting data...')
|
||||
disp(' ')
|
||||
|
||||
% Extract the headers into a separate array
|
||||
headers = data(1,:);
|
||||
data(1,:) = [];
|
||||
|
||||
% Convert the string dates to numbers
|
||||
data(:,2) = num2cell(datenum(data(:,2)));
|
||||
|
||||
% Sort the array by date
|
||||
data = sortrows(data,2);
|
||||
|
||||
% Coerce all id fields to string format
|
||||
strings = [1 3 4];
|
||||
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
|
||||
|
||||
% Define fields to hold id values for animals and their parents
|
||||
ids = char(data{:,1});
|
||||
damids = char(data{:,3});
|
||||
sireids = char(data{:,4});
|
||||
numanimals = length(data(:,1));
|
||||
|
||||
% Define fields to hold the index values for animals and their parents
|
||||
dam = zeros(numanimals,1);
|
||||
sire = zeros(numanimals,1);
|
||||
|
||||
% Find all row numbers where an animal was a parent
|
||||
for i=1:numanimals
|
||||
% Find all animals that this animal birthed
|
||||
dammatch = ismember(damids, ids(i,:), 'rows');
|
||||
damindexes = find(dammatch == 1);
|
||||
dam(damindexes) = i;
|
||||
|
||||
% Find all animals that this animal sired
|
||||
sirematch = ismember(sireids, ids(i,:), 'rows');
|
||||
sireindexes = find(sirematch == 1);
|
||||
sire(sireindexes) = i;
|
||||
end
|
||||
|
||||
% Store column numbers that need to be deleted
|
||||
% Column 6 contains an intermediate Excel calculation and always needs to
|
||||
% be deleted
|
||||
colstodelete = 6;
|
||||
|
||||
% Coerce each group to string format
|
||||
for i = 7:length(headers)
|
||||
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
|
||||
end
|
||||
|
||||
% Find any columns that need to be deleted
|
||||
for i = 7:length(headers)
|
||||
if length(uniquecell(data(:,i))) <= 1
|
||||
colname = headers{i};
|
||||
disp(['Column "' colname '" does not have any unique animals and will be removed'])
|
||||
disp('from this analysis');
|
||||
colstodelete = [colstodelete i];
|
||||
end
|
||||
end
|
||||
|
||||
% Delete the appropriate columns from the datasheet and the headers
|
||||
data(:,colstodelete) = [];
|
||||
headers(colstodelete) = [];
|
||||
|
||||
% Determine how many contemporary groups there are
|
||||
numgroups = ones(1, length(headers)-5);
|
||||
for i = 6:length(headers)
|
||||
numgroups(i-5) = length(uniquecell(data(:,i)));
|
||||
end
|
||||
|
||||
% If there are more groups than animals, then the analysis cannot continue
|
||||
if sum(numgroups) >= numanimals
|
||||
disp('There are more contemporary groups than animals. The analysis will now abort.');
|
||||
return
|
||||
end
|
||||
|
||||
% Define a "normal" animal as one of the last in the groups, provided that
|
||||
% all traits do not have null values
|
||||
normal = cell([1 length(headers)-5]);
|
||||
for i = 6:length(headers)
|
||||
for j = numanimals:-1:1
|
||||
if not(cellfun(@isempty, data(j,i)))
|
||||
normal(i - 5) = data(j,i);
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
% Print the results of the "normal" definition
|
||||
disp(' ')
|
||||
disp('For the purposes of this analysis, a "normal" animal will be defined')
|
||||
disp('by the following traits:')
|
||||
for i = 6:length(headers)
|
||||
disp([headers{i} ': ' normal{i-5}])
|
||||
end
|
||||
disp(' ')
|
||||
disp('If no animal matching this description exists, the results may appear')
|
||||
disp('outlandish, but are still as correct as the accuracy suggests')
|
||||
disp(' ')
|
||||
|
||||
disp('Processing and formatting data... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Create the fixed-effect matrix
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Creating the fixed-effect matrix...')
|
||||
|
||||
% Form the fixed effect matrix
|
||||
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
|
||||
X(:,1) = ones(1, numanimals);
|
||||
|
||||
% Create an external counter that will increment through both loops
|
||||
I = 2;
|
||||
|
||||
% Store the traits in a string cell array
|
||||
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
|
||||
|
||||
% Iterate through each group
|
||||
for i = 1:length(normal)
|
||||
% Find the traits that are present in this trait
|
||||
traits = uniquecell(data(:,i+5));
|
||||
|
||||
% Remove the "normal" version from the analysis
|
||||
normalindex = find(strcmp(traits, normal{i}));
|
||||
traits(normalindex) = [];
|
||||
|
||||
% Iterate inside of the group
|
||||
for j = 1:length(traits)
|
||||
matchedindex = find(strcmp(data(:,i+5), traits{j}));
|
||||
X(matchedindex, I) = 1;
|
||||
|
||||
% Add this trait to the string
|
||||
adjustedtraits(I - 1) = traits(j);
|
||||
|
||||
% Increment the big counter
|
||||
I = I + 1;
|
||||
end
|
||||
end
|
||||
|
||||
disp('Creating the fixed-effect matrix... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Additive relationship matrix
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Creating the additive relationship matrix...')
|
||||
|
||||
% Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals);
|
||||
|
||||
% Create the additive relationship matrix by the FORTRAN method presented
|
||||
% by Henderson
|
||||
for i = 1:numanimals
|
||||
if dam(i) ~= 0 && sire(i) ~= 0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*(A(j,sire(i))+A(j,dam(i)));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1 + 0.5*A(sire(i),dam(i));
|
||||
elseif dam(i) ~= 0 && sire(i) == 0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*A(j,dam(i));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1;
|
||||
elseif dam(i) == 0 && sire(i) ~=0
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0.5*A(j,sire(i));
|
||||
A(i,j) = A(j,i);
|
||||
end
|
||||
A(i,i) = 1;
|
||||
else
|
||||
for j = 1:(i-1)
|
||||
A(j,i) = 0;
|
||||
A(i,j) = 0;
|
||||
end
|
||||
A(i,i) = 1;
|
||||
end
|
||||
end
|
||||
|
||||
disp('Creating the additive relationship matrix... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Perform BLUP
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Solving the mixed-model equations')
|
||||
|
||||
% Extract the observed data
|
||||
Y = cell2mat(data(:, 5));
|
||||
|
||||
% The identity matrix for random effects
|
||||
Z = eye(numanimals, numanimals);
|
||||
|
||||
% Remove items where there is no data
|
||||
nullobs = find(isnan(Y));
|
||||
Z(nullobs, nullobs) = 0;
|
||||
|
||||
% Calculate heritability
|
||||
lambda = (1-h2)/h2;
|
||||
|
||||
% Use the mixed-model equations
|
||||
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
|
||||
|
||||
% Find the accuracies
|
||||
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
|
||||
reliability = 1 - diaginv.*lambda;
|
||||
|
||||
disp('Solving the mixed-model equations... Done!')
|
||||
toc
|
||||
disp(' ')
|
||||
|
||||
%% Output the results
|
||||
tic
|
||||
disp(' ')
|
||||
disp('Saving results...')
|
||||
|
||||
% Find how many traits we found BLUE for
|
||||
numgroups = numgroups - 1;
|
||||
|
||||
% Start printing results to output
|
||||
fileID = fopen([savepath savename], 'w');
|
||||
fprintf(fileID, 'beefblup Results Report\n');
|
||||
fprintf(fileID, 'Produced using beefblup for MATLAB (');
|
||||
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
|
||||
fprintf(fileID, ')\n\n');
|
||||
fprintf(fileID, 'Input:\t');
|
||||
fprintf(fileID, '%s', fullname);
|
||||
fprintf(fileID, '\nAnalysis performed:\t');
|
||||
fprintf(fileID, date);
|
||||
fprintf(fileID, '\nTrait examined:\t');
|
||||
fprintf(fileID, [headers{5}]);
|
||||
fprintf(fileID, '\n\n');
|
||||
|
||||
% Print base population stats
|
||||
fprintf(fileID, 'Base Population:\n');
|
||||
for i = 1:length(numgroups)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [headers{i+5}]);
|
||||
fprintf(fileID, ':\t');
|
||||
fprintf(fileID, [normal{i}]);
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
fprintf(fileID, '\tMean ');
|
||||
fprintf(fileID, [headers{5}]);
|
||||
fprintf(fileID, ':\t');
|
||||
fprintf(fileID, num2str(solutions(1)));
|
||||
fprintf(fileID, '\n\n');
|
||||
|
||||
I = 2;
|
||||
|
||||
% Contemporary group adjustments
|
||||
fprintf(fileID, 'Contemporary Group Effects:\n');
|
||||
for i = 1:length(numgroups)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [headers{i+5}]);
|
||||
fprintf(fileID, '\tEffect\tReliability\n');
|
||||
for j = 1:numgroups(i)
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [adjustedtraits{I-1}]);
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(solutions(I)));
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(reliability(I)));
|
||||
fprintf(fileID, '\n');
|
||||
|
||||
I = I + 1;
|
||||
end
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
fprintf(fileID, '\n');
|
||||
|
||||
% Expected breeding values
|
||||
fprintf(fileID, 'Expected Breeding Values:\n');
|
||||
fprintf(fileID, '\tID\tEBV\tReliability\n');
|
||||
for i = 1:numanimals
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, [data{i,1}]);
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(solutions(i+I-1)));
|
||||
fprintf(fileID, '\t');
|
||||
fprintf(fileID, num2str(reliability(i+I-1)));
|
||||
fprintf(fileID, '\n');
|
||||
end
|
||||
|
||||
fprintf(fileID, '\n - END REPORT -');
|
||||
fclose(fileID);
|
||||
|
||||
disp('Saving results... Done!')
|
||||
toc
|
||||
disp(' ')
|
|
@ -1,9 +0,0 @@
|
|||
% uniquenan
|
||||
% Serves the same purpose as UNIQUE, but ensures any empty cells are not
|
||||
% counted
|
||||
function y = uniquecell(x)
|
||||
y = unique(x);
|
||||
if any(cellfun(@isempty, y))
|
||||
y(cellfun(@isempty, y)) = [];
|
||||
end
|
||||
end
|
6
Project.toml
Normal file
6
Project.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[deps]
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44"
|
||||
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
77
README.md
77
README.md
|
@ -13,35 +13,34 @@ Why? It's part of my effort to
|
|||
## Installation
|
||||
|
||||
1. [Download and install Julia](https://julialang.org/downloads/platform/)
|
||||
2. Open a new Julia window and type the `]` key
|
||||
3. Type `add XLSX Gtk` and press **Enter**
|
||||
|
||||
Alternatively, you can run the [install
|
||||
script](https://github.com/MillironX/beefblup/raw/master/Julia/install.jl) from
|
||||
Julia.
|
||||
2. Download the [beefblup ZIP
|
||||
file](https://github.com/MillironX/beefblup/archive/refs/tags/v0.2.zip) and unzip it someplace memorable
|
||||
3. In your file explorer, copy the address of the "beefblup" folder
|
||||
4. Launch Julia
|
||||
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||
6. Type the `]` key
|
||||
7. Type `activate .` and press **Enter**
|
||||
8. Type `instantiate` and press **Enter**
|
||||
9. Installation is done: you can close the Julia window
|
||||
|
||||
## How to Use
|
||||
|
||||
> **Note:** beefblup and [Juno](https://junolab.org)/[Julia Pro](https://juliacomputing.com/products/juliapro.html) currently [don't get along](https://github.com/JunoLab/Juno.jl/issues/118).
|
||||
> Although it's tempting to just open up beefblup in Juno and press the big play
|
||||
> button, it won't work. Follow these instructions until it's fixed. If you
|
||||
> don't know what Juno is: ignore this message.
|
||||
|
||||
1. Download the [beefblup ZIP
|
||||
file](https://github.com/MillironX/beefblup/archive/v0.1.zip) and unzip it
|
||||
someplace memorable
|
||||
2. Make a copy of the "Master BLUP Worksheet" and replace the sample data with your own
|
||||
3. If you wish to add more contemporary group traits to your analysis, replace
|
||||
or add them to the right of the Purple section
|
||||
4. Save and close
|
||||
5. In your file explorer, copy the address of the "Julia" folder
|
||||
6. Launch Julia
|
||||
7. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup\Julia")`)
|
||||
8. Type `include("beefblup.jl")` and press **Enter**
|
||||
9. Select the spreadsheet you created in steps 1-4
|
||||
10. Follow the on-screen prompts
|
||||
11. **#KeepEPDsReal!**
|
||||
1. Make a copy of the "sample.csv" spreadsheet and replace the data with your own
|
||||
1. The trait you wish to calculate EBVs for always goes in the rightmost column
|
||||
2. If you wish to add more contemporary group traits to your analysis, include them before the rightmost column
|
||||
2. Save and close
|
||||
3. In your file explorer, copy the address of the "beefblup" folder
|
||||
4. Launch Julia
|
||||
5. Type `cd("<the address copied in step 5")` and press **Enter** (For example,
|
||||
`cd("C:\Users\MillironX\Documents\beefblup")`)
|
||||
6. Type the `]` key
|
||||
7. Type `activate .` and press **Enter**
|
||||
8. Press **Backspace**
|
||||
9. Type `include("src/beefblup.jl")` and press **Enter**
|
||||
10. Select the spreadsheet you created in steps 1-4
|
||||
11. Follow the on-screen prompts
|
||||
12. **#KeepEPDsReal!**
|
||||
|
||||
## For Programmers
|
||||
|
||||
|
@ -51,19 +50,19 @@ Julia.
|
|||
|
||||
### Development Roadmap
|
||||
|
||||
| Version | Feature |
|
||||
| ------- | ------------------------------------------------------------------- |
|
||||
| v0.1 | Julia port of original MATLAB script |
|
||||
| v0.2 | Spreadsheet format redesign |
|
||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) |
|
||||
| v0.4 | Add GUI for all options |
|
||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects |
|
||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) |
|
||||
| v0.7 | Multiple trait BLUP |
|
||||
| v0.8 | Maternal effects BLUP |
|
||||
| v0.9 | Genomic BLUP |
|
||||
| v0.10 | beefblup binaries |
|
||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) |
|
||||
| Version | Feature | Status |
|
||||
| ------- | ----------------------------------------------------------------------------------- | ------------------ |
|
||||
| v0.1 | Julia port of original MATLAB script | :heavy_check_mark: |
|
||||
| v0.2 | Spreadsheet format redesign | :heavy_check_mark: |
|
||||
| v0.3 | API rewrite (change to function calls and package format instead of script running) | |
|
||||
| v0.4 | Add GUI for all options | |
|
||||
| v0.5 | Automatically calculated Age-Of-Dam, Year, and Season fixed-effects | |
|
||||
| v0.6 | Repeated measurement BLUP (aka dairyblup) | |
|
||||
| v0.7 | Multiple trait BLUP | |
|
||||
| v0.8 | Maternal effects BLUP | |
|
||||
| v0.9 | Genomic BLUP | |
|
||||
| v0.10 | beefblup binaries | |
|
||||
| v1.0 | [Finally, RELEASE!!!](https://youtu.be/1CBjxGdgC1w?t=282) | |
|
||||
|
||||
### Bug Reports
|
||||
|
||||
|
|
8
data/sample.csv
Normal file
8
data/sample.csv
Normal file
|
@ -0,0 +1,8 @@
|
|||
id,birthdate,dam,sire,year,sex,weaning_weight
|
||||
1,1/1/1990,,,1990,male,354
|
||||
2,1/1/1990,,,1990,female,251
|
||||
3,1/1/1991,,1,1991,male,327
|
||||
4,1/1/1991,,1,1991,female,328
|
||||
5,1/1/1991,2,1,1991,male,301
|
||||
6,1/1/1991,2,,1991,female,270
|
||||
7,1/1/1992,,,1992,male,330
|
|
572
Julia/beefblup.jl → src/beefblup.jl
Normal file → Executable file
572
Julia/beefblup.jl → src/beefblup.jl
Normal file → Executable file
|
@ -1,287 +1,285 @@
|
|||
# beefblup
|
||||
# Main script for performing single-variate BLUP to find beef cattle
|
||||
# breeding values
|
||||
# Usage: julia beefblup.jl
|
||||
# (C) 2020 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
|
||||
# Import the required packages
|
||||
using XLSX
|
||||
using LinearAlgebra
|
||||
using Dates
|
||||
using Gtk
|
||||
|
||||
# Display stuff
|
||||
println("beefblup v 0.1")
|
||||
println("(C) 2020 Thomas A. Christensen II")
|
||||
println("https://github.com/millironx/beefblup")
|
||||
print("\n")
|
||||
|
||||
### Prompt User
|
||||
# Ask for an input spreadsheet
|
||||
path = open_dialog_native(
|
||||
"Select a beefblup worksheet",
|
||||
GtkNullContainer(),
|
||||
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
|
||||
)
|
||||
|
||||
# Ask for an output text filename
|
||||
savepath = save_dialog_native(
|
||||
"Save your beefblup results",
|
||||
GtkNullContainer(),
|
||||
(GtkFileFilter("*.txt", name="Results file"),
|
||||
"*.txt")
|
||||
)
|
||||
|
||||
# Ask for heritability
|
||||
print("What is the heritability for this trait?> ")
|
||||
h2 = parse(Float64, readline(stdin))
|
||||
|
||||
### Import input filename
|
||||
print("[🐮]: Importing Excel file...")
|
||||
|
||||
# Import data from a suitable spreadsheet
|
||||
data = XLSX.readxlsx(path)[1][:]
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Process input file
|
||||
print("[🐮]: Processing and formatting data...")
|
||||
|
||||
# Extract the headers into a separate array
|
||||
headers = data[1,:]
|
||||
data = data[2:end,:]
|
||||
|
||||
# Sort the array by date
|
||||
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
|
||||
|
||||
# Define fields to hold id values for animals and their parents
|
||||
ids = string.(data[:,1])
|
||||
damids = string.(data[:,3])
|
||||
sireids = string.(data[:,4])
|
||||
numanimals = length(ids)
|
||||
|
||||
# Find the index values for animals and their parents
|
||||
dam = indexin(damids, ids)
|
||||
sire = indexin(sireids, ids)
|
||||
|
||||
# Store column numbers that need to be deleted
|
||||
# Column 6 contains an intermediate Excel calculation and always need to
|
||||
# be deleted
|
||||
colstokeep = [1, 2, 3, 4, 5]
|
||||
|
||||
# Find any columns that need to be deleted
|
||||
for i in 7:length(headers)
|
||||
if length(unique(data[:,i])) <= 1
|
||||
colname = headers[i]
|
||||
print("Column '")
|
||||
print(colname)
|
||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
||||
else
|
||||
push!(colstokeep, i)
|
||||
end
|
||||
end
|
||||
|
||||
# Delete the appropriate columns from the datasheet and the headers
|
||||
data = data[:, colstokeep]
|
||||
headers = headers[colstokeep]
|
||||
|
||||
# Determine how many contemporary groups there are
|
||||
numgroups = ones(1, length(headers)-5)
|
||||
for i in 6:length(headers)
|
||||
numgroups[i-5] = length(unique(data[:,i]))
|
||||
end
|
||||
|
||||
# If there are more groups than animals, then the analysis cannot continue
|
||||
if sum(numgroups) >= numanimals
|
||||
println("There are more contemporary groups than animals. The analysis will
|
||||
now abort.")
|
||||
exit()
|
||||
end
|
||||
|
||||
# Define a "normal" animal as one of the last in the groups, provided that
|
||||
# all traits do not have null values
|
||||
normal = Array{String}(undef,1,length(headers)-5)
|
||||
for i in 6:length(headers)
|
||||
for j in numanimals:-1:1
|
||||
if !ismissing(data[j,i])
|
||||
normal[i-5] = string(data[j,i])
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Create the fixed-effect matrix
|
||||
print("[🐮]: Creating the fixed-effect matrix...")
|
||||
|
||||
# Form the fixed-effect matrix
|
||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
||||
X[:,1] = ones(Int8, 1, numanimals)
|
||||
|
||||
# Create an external counter that will increment through both loops
|
||||
counter = 2
|
||||
|
||||
# Store the traits in a string array
|
||||
adjustedtraits =
|
||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
||||
# Iterate through each group
|
||||
for i in 1:length(normal)
|
||||
# Find the traits that are present in this trait
|
||||
localdata = string.(data[:,i+5])
|
||||
traits = unique(localdata)
|
||||
# Remove the normal version from the analysis
|
||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||
# Iterate inside of the group
|
||||
for j in 1:length(effecttraits)
|
||||
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
||||
X[matchedindex, counter] .= 1
|
||||
# Add this trait to the string
|
||||
adjustedtraits[counter - 1] = traits[j]
|
||||
# Increment the big counter
|
||||
global counter = counter + 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Additive relationship matrix
|
||||
print("[🐮]: Creating additive relationship matrix...")
|
||||
|
||||
# Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals)
|
||||
|
||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||
# Henderson
|
||||
for i in 1:numanimals
|
||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,dam[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,sire[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
else
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0
|
||||
A[i,j] = 0
|
||||
end
|
||||
A[i,i] = 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Perform BLUP
|
||||
print("[🐮]: Solving the mixed-model equations...")
|
||||
|
||||
# Extract the observed data
|
||||
Y = convert(Array{Float64}, data[:,5])
|
||||
|
||||
# The random effects matrix
|
||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||
|
||||
# Remove items where there is no data
|
||||
nullobs = findall(isnothing, Y)
|
||||
Z[nullobs, nullobs] .= 0
|
||||
|
||||
# Calculate heritability
|
||||
λ = (1-h2)/h2
|
||||
|
||||
# Use the mixed-model equations
|
||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
||||
MMY = [X'*Y; Z'*Y]
|
||||
solutions = MME\MMY
|
||||
|
||||
# Find the accuracies
|
||||
diaginv = diag(inv(MME))
|
||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Output the results
|
||||
print("[🐮]: Saving results...")
|
||||
|
||||
# Find how many traits we found BLUE for
|
||||
numgroups = numgroups .- 1
|
||||
|
||||
# Start printing results to output
|
||||
fileID = open(savepath, "w")
|
||||
write(fileID, "beefblup Results Report\n")
|
||||
write(fileID, "Produced using beefblup for Julia (")
|
||||
write(fileID, "https://github.com/millironx/beefblup")
|
||||
write(fileID, ")\n\n")
|
||||
write(fileID, "Input:\t")
|
||||
write(fileID, path)
|
||||
write(fileID, "\nAnalysis performed:\t")
|
||||
write(fileID, string(Dates.today()))
|
||||
write(fileID, "\nTrait examined:\t")
|
||||
write(fileID, headers[5])
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Print base population stats
|
||||
write(fileID, "Base Population:\n")
|
||||
for i in 1:length(numgroups)
|
||||
write(fileID, "\t")
|
||||
write(fileID, headers[i+5])
|
||||
write(fileID, ":\t")
|
||||
write(fileID, normal[i])
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\tMean ")
|
||||
write(fileID, headers[5])
|
||||
write(fileID, ":\t")
|
||||
write(fileID, string(solutions[1]))
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Contemporary group adjustments
|
||||
counter = 2
|
||||
write(fileID, "Contemporary Group Effects:\n")
|
||||
for i in 1:length(numgroups)
|
||||
write(fileID, "\t")
|
||||
write(fileID, headers[i+5])
|
||||
write(fileID, "\tEffect\tReliability\n")
|
||||
for j in 1:numgroups[i]
|
||||
write(fileID, "\t")
|
||||
write(fileID, adjustedtraits[counter - 1])
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[counter]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[counter]))
|
||||
write(fileID, "\n")
|
||||
|
||||
global counter = counter + 1
|
||||
end
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\n")
|
||||
|
||||
# Expected breeding values
|
||||
write(fileID, "Expected Breeding Values:\n")
|
||||
write(fileID, "\tID\tEBV\tReliability\n")
|
||||
for i in 1:numanimals
|
||||
write(fileID, "\t")
|
||||
write(fileID, ids[i])
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[i+counter-1]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[i+counter-1]))
|
||||
write(fileID, "\n")
|
||||
end
|
||||
|
||||
write(fileID, "\n - END REPORT -")
|
||||
close(fileID)
|
||||
|
||||
print("Done!\n")
|
||||
#!/bin/bash
|
||||
#=
|
||||
exec julia --project=$(realpath $(dirname $(dirname "${BASH_SOURCE[0]}"))) "${BASH_SOURCE[0]}" "$@"
|
||||
=#
|
||||
# beefblup
|
||||
# Main script for performing single-variate BLUP to find beef cattle
|
||||
# breeding values
|
||||
# Usage: julia beefblup.jl
|
||||
# (C) 2021 Thomas A. Christensen II
|
||||
# Licensed under BSD-3-Clause License
|
||||
# cSpell:includeRegExp #.*
|
||||
# cSpell:includeRegExp ("""|''')[^\1]*\1
|
||||
|
||||
# Import the required packages
|
||||
using CSV
|
||||
using DataFrames
|
||||
using LinearAlgebra
|
||||
using Dates
|
||||
using Gtk
|
||||
|
||||
# Display stuff
|
||||
println("beefblup v 0.2")
|
||||
println("(C) 2021 Thomas A. Christensen II")
|
||||
println("https://github.com/millironx/beefblup")
|
||||
print("\n")
|
||||
|
||||
### Prompt User
|
||||
# Ask for an input spreadsheet
|
||||
path = open_dialog_native(
|
||||
"Select a beefblup worksheet",
|
||||
GtkNullContainer(),
|
||||
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||
)
|
||||
|
||||
# Ask for an output text filename
|
||||
savepath = save_dialog_native(
|
||||
"Save your beefblup results",
|
||||
GtkNullContainer(),
|
||||
(GtkFileFilter("*.txt", name="Results file"),
|
||||
"*.txt")
|
||||
)
|
||||
|
||||
# Ask for heritability
|
||||
print("What is the heritability for this trait?> ")
|
||||
h2 = parse(Float64, readline(stdin))
|
||||
|
||||
### Import input filename
|
||||
print("[🐮]: Importing data file...")
|
||||
|
||||
# Import data from a suitable spreadsheet
|
||||
data = DataFrame(CSV.File(path))
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Process input file
|
||||
print("[🐮]: Processing and formatting data...")
|
||||
|
||||
# Sort the array by date
|
||||
sort!(data, :birthdate)
|
||||
|
||||
# Define fields to hold id values for animals and their parents
|
||||
numanimals = length(data.id)
|
||||
|
||||
# Find the index values for animals and their parents
|
||||
dam = indexin(data.dam, data.id)
|
||||
sire = indexin(data.sire, data.id)
|
||||
|
||||
# Extract all of the fixed effects
|
||||
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1]
|
||||
|
||||
# Find any columns that need to be deleted
|
||||
for i in 1:ncol(fixedfx)
|
||||
if length(unique(fixedfx[:,i])) <= 1
|
||||
colname = names(fixedfx)[i]
|
||||
print("Column '")
|
||||
print(colname)
|
||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
||||
deletecols!(fixedfx,i)
|
||||
end
|
||||
end
|
||||
|
||||
# Determine how many contemporary groups there are
|
||||
numtraits = ncol(fixedfx)
|
||||
numgroups = ones(1, numtraits)
|
||||
for i in 1:numtraits
|
||||
numgroups[i] = length(unique(fixedfx[:,i]))
|
||||
end
|
||||
|
||||
# If there are more groups than animals, then the analysis cannot continue
|
||||
if sum(numgroups) >= numanimals
|
||||
println("There are more contemporary groups than animals. The analysis will
|
||||
now abort.")
|
||||
exit()
|
||||
end
|
||||
|
||||
# Define a "normal" animal as one of the last in the groups, provided that
|
||||
# all traits do not have null values
|
||||
normal = Array{String}(undef,1,numtraits)
|
||||
for i in 1:numtraits
|
||||
for j in numanimals:-1:1
|
||||
if !ismissing(fixedfx[j,i])
|
||||
normal[i] = string(fixedfx[j,i])
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Create the fixed-effect matrix
|
||||
print("[🐮]: Creating the fixed-effect matrix...")
|
||||
|
||||
# Form the fixed-effect matrix
|
||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
||||
X[:,1] = ones(Int8, 1, numanimals)
|
||||
|
||||
# Create an external counter that will increment through both loops
|
||||
counter = 2
|
||||
|
||||
# Store the traits in a string array
|
||||
adjustedtraits =
|
||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
||||
# Iterate through each group
|
||||
for i in 1:length(normal)
|
||||
# Find the traits that are present in this trait
|
||||
localdata = string.(fixedfx[:,i])
|
||||
traits = unique(localdata)
|
||||
# Remove the normal version from the analysis
|
||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||
# Iterate inside of the group
|
||||
for j in 1:(length(effecttraits))
|
||||
matchedindex = findall(x -> x == effecttraits[j], localdata)
|
||||
X[matchedindex, counter] .= 1
|
||||
# Add this trait to the string
|
||||
adjustedtraits[counter - 1] = traits[j]
|
||||
# Increment the big counter
|
||||
global counter = counter + 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Additive relationship matrix
|
||||
print("[🐮]: Creating additive relationship matrix...")
|
||||
|
||||
# Create an empty matrix for the additive relationship matrix
|
||||
A = zeros(numanimals, numanimals)
|
||||
|
||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||
# Henderson
|
||||
for i in 1:numanimals
|
||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,dam[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0.5*A[j,sire[i]]
|
||||
A[i,j] = A[j,i]
|
||||
end
|
||||
A[i,i] = 1
|
||||
else
|
||||
for j in 1:(i-1)
|
||||
A[j,i] = 0
|
||||
A[i,j] = 0
|
||||
end
|
||||
A[i,i] = 1
|
||||
end
|
||||
end
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Perform BLUP
|
||||
print("[🐮]: Solving the mixed-model equations...")
|
||||
|
||||
# Extract the observed data
|
||||
Y = convert(Array{Float64}, data[:,end])
|
||||
|
||||
# The random effects matrix
|
||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||
|
||||
# Remove items where there is no data
|
||||
nullobs = findall(isnothing, Y)
|
||||
Z[nullobs, nullobs] .= 0
|
||||
|
||||
# Calculate heritability
|
||||
λ = (1-h2)/h2
|
||||
|
||||
# Use the mixed-model equations
|
||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
||||
MMY = [X'*Y; Z'*Y]
|
||||
solutions = MME\MMY
|
||||
|
||||
# Find the accuracies
|
||||
diaginv = diag(inv(MME))
|
||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
||||
|
||||
print("Done!\n")
|
||||
|
||||
### Output the results
|
||||
print("[🐮]: Saving results...")
|
||||
|
||||
# Find how many traits we found BLUE for
|
||||
numgroups = numgroups .- 1
|
||||
|
||||
# Extract the names of the traits
|
||||
fixedfxnames = names(fixedfx)
|
||||
traitname = names(data)[end]
|
||||
|
||||
# Start printing results to output
|
||||
fileID = open(savepath, "w")
|
||||
write(fileID, "beefblup Results Report\n")
|
||||
write(fileID, "Produced using beefblup (")
|
||||
write(fileID, "https://github.com/millironx/beefblup")
|
||||
write(fileID, ")\n\n")
|
||||
write(fileID, "Input:\t")
|
||||
write(fileID, path)
|
||||
write(fileID, "\nAnalysis performed:\t")
|
||||
write(fileID, string(Dates.today()))
|
||||
write(fileID, "\nTrait examined:\t")
|
||||
write(fileID, traitname)
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Print base population stats
|
||||
write(fileID, "Base Population:\n")
|
||||
for i in 1:length(normal)
|
||||
write(fileID, "\t")
|
||||
write(fileID, fixedfxnames[i])
|
||||
write(fileID, ":\t")
|
||||
write(fileID, normal[i])
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\tMean ")
|
||||
write(fileID, traitname)
|
||||
write(fileID, ":\t")
|
||||
write(fileID, string(solutions[1]))
|
||||
write(fileID, "\n\n")
|
||||
|
||||
# Contemporary group adjustments
|
||||
counter = 2
|
||||
write(fileID, "Contemporary Group Effects:\n")
|
||||
for i in 1:length(numgroups)
|
||||
write(fileID, "\t")
|
||||
write(fileID, fixedfxnames[i])
|
||||
write(fileID, "\tEffect\tReliability\n")
|
||||
for j in 1:numgroups[i]
|
||||
write(fileID, "\t")
|
||||
write(fileID, adjustedtraits[counter - 1])
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[counter]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[counter]))
|
||||
write(fileID, "\n")
|
||||
|
||||
global counter = counter + 1
|
||||
end
|
||||
write(fileID, "\n")
|
||||
end
|
||||
write(fileID, "\n")
|
||||
|
||||
# Expected breeding values
|
||||
write(fileID, "Expected Breeding Values:\n")
|
||||
write(fileID, "\tID\tEBV\tReliability\n")
|
||||
for i in 1:numanimals
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(data.id[i]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(solutions[i+counter-1]))
|
||||
write(fileID, "\t")
|
||||
write(fileID, string(reliability[i+counter-1]))
|
||||
write(fileID, "\n")
|
||||
end
|
||||
|
||||
write(fileID, "\n - END REPORT -")
|
||||
close(fileID)
|
||||
|
||||
print("Done!\n")
|
Loading…
Reference in a new issue