1
0
Fork 0
mirror of https://github.com/MillironX/beefblup.git synced 2024-09-21 05:12:05 +00:00
beefblup/MATLAB/beefblup.m

344 lines
9.1 KiB
Mathematica
Raw Normal View History

2018-09-14 15:28:10 +00:00
% beefblup
% Main script for performing single-variate BLUP to find beef cattle
% breeding values
% Usage: beefblup
% (C) 2018 Thomas A. Christensen II
% Licensed under BSD-3-Clause License
% Prepare the workspace for computation
clear
clc
close all
2018-11-13 19:37:04 +00:00
%% Display stuff
disp('beefblup v. 0.0.0.1')
disp('(C) 2018 Thomas A. Christensen II')
disp('https://github.com/millironx/beefblup')
disp(' ')
%% Prompt User
% Ask for an input spreadsheet
2018-09-14 15:28:10 +00:00
[name, path] = uigetfile('*.xlsx','Select a beefblup worksheet');
2018-11-13 19:37:04 +00:00
% Ask for an ouput text file
[savename, savepath, ~] = uiputfile('*.txt', 'Save your beefblup results', 'results');
% Ask for heritability
h2 = input('What is the heritability for this trait? >> ');
%% Import input file
tic
disp(' ')
disp('Importing Excel file...')
% Import data from a suitable spreadsheet
2018-09-14 15:28:10 +00:00
fullname = [path name];
clear name path
[~, ~, data] = xlsread(fullname);
2018-11-13 19:37:04 +00:00
disp('Importing Excel file... Done!')
toc
disp(' ')
%% Process input file
tic
disp(' ')
disp('Processing and formatting data...')
disp(' ')
2018-09-14 15:28:10 +00:00
% Extract the headers into a separate array
headers = data(1,:);
data(1,:) = [];
% Convert the string dates to numbers
data(:,2) = num2cell(datenum(data(:,2)));
% Sort the array by date
data = sortrows(data,2);
2018-09-21 22:42:23 +00:00
% Coerce all id fields to string format
strings = [1 3 4];
data(:,strings) = cellfun(@num2str, data(:,strings), 'UniformOutput', false);
2018-09-14 15:28:10 +00:00
% Create a lookup lambda function to find the animal represented by a
% certain id
animal2row = @(id) find(strcmp(data(:,1), id));
row2animal = @(rownum) [data{rownum,1}];
2018-09-19 15:41:07 +00:00
ids = [data{:,1}];
2018-09-21 22:42:23 +00:00
numanimals = length(data(:,1));
% Store column numbers that need to be deleted
% Column 6 contains an intermediate Excel calculation and always needs to
% be deleted
colstodelete = 6;
% Coerce each group to string format
for i = 7:length(headers)
data(:,i) = cellfun(@num2str, data(:,i), 'UniformOutput', false);
end
% Find any columns that need to be deleted
for i = 7:length(headers)
if length(uniquecell(data(:,i))) <= 1
colname = headers{i};
2018-09-17 15:10:45 +00:00
disp(['Column "' colname '" does not have any unique animals and will be removed'])
disp('from this analysis');
colstodelete = [colstodelete i];
end
end
% Delete the appropriate columns from the datasheet and the headers
data(:,colstodelete) = [];
headers(colstodelete) = [];
% Determine how many contemporary groups there are
numgroups = ones(1, length(headers)-5);
for i = 6:length(headers)
numgroups(i-5) = length(uniquecell(data(:,i)));
end
% If there are more groups than animals, then the analysis cannot continue
if sum(numgroups) >= numanimals
disp('There are more contemporary groups than animals. The analysis will now abort.');
return
end
2018-09-17 15:00:21 +00:00
% Define a "normal" animal as one of the last in the groups, provided that
% all traits do not have null values
normal = cell([1 length(headers)-5]);
for i = 6:length(headers)
for j = numanimals:-1:1
if not(cellfun(@isempty, data(j,i)))
normal(i - 5) = data(j,i);
2018-09-17 15:10:45 +00:00
break
2018-09-17 15:00:21 +00:00
end
end
2018-09-17 15:10:45 +00:00
end
% Print the results of the "normal" definition
disp(' ')
disp('For the purposes of this analysis, a "normal" animal will be defined')
disp('by the following traits:')
for i = 6:length(headers)
disp([headers{i} ': ' normal{i-5}])
end
disp(' ')
disp('If no animal matching this description exists, the results may appear')
disp('outlandish, but are still as correct as the accuracy suggests')
disp(' ')
2018-11-13 19:37:04 +00:00
disp('Processing and formatting data... Done!')
toc
disp(' ')
%% Create the fixed-effect matrix
tic
disp(' ')
disp('Creating the fixed-effect matrix...')
% Form the fixed effect matrix
X = zeros(numanimals, sum(numgroups)-length(numgroups)+1);
X(:,1) = ones(1, numanimals);
2018-09-19 15:20:01 +00:00
% Create an external counter that will increment through both loops
I = 2;
2018-09-22 04:58:47 +00:00
% Store the traits in a string cell array
adjustedtraits = cell(1, sum(numgroups)-length(numgroups));
% Iterate through each group
for i = 1:length(normal)
% Find the traits that are present in this trait
traits = uniquecell(data(:,i+5));
% Remove the "normal" version from the analysis
normalindex = find(strcmp(traits, normal{i}));
traits(normalindex) = [];
2018-09-19 15:20:01 +00:00
% Iterate inside of the group
for j = 1:length(traits)
matchedindex = find(strcmp(data(:,i+5), traits{j}));
X(matchedindex, I) = 1;
2018-09-22 04:58:47 +00:00
% Add this trait to the string
adjustedtraits(I - 1) = traits(j);
% Increment the big counter
2018-09-19 15:20:01 +00:00
I = I + 1;
end
2018-09-19 15:41:07 +00:00
end
2018-11-13 19:37:04 +00:00
disp('Creating the fixed-effect matrix... Done!')
toc
disp(' ')
%% Additive relationship matrix
tic
disp(' ')
disp('Creating the additive relationship matrix...')
2018-09-19 15:41:07 +00:00
% Create an empty matrix for the additive relationship matrix
2018-09-22 02:34:36 +00:00
A = zeros(numanimals, numanimals);
2018-09-21 22:42:23 +00:00
% Create lambdas to find sire and dam of each animal
id2dam = @(id) [data{animal2row(num2str(id)), 3}];
id2sire = @(id) [data{animal2row(num2str(id)), 4}];
row2dam = @(rownum) [data{rownum, 3}];
row2sire = @(rownum) [data{rownum, 4}];
rowofdam = @(id) animal2row(id2dam(id));
rowofsire = @(id) animal2row(id2sire(id));
row2rowofdam = @(rownum) rowofdam(row2animal(rownum));
row2rowofsire = @(rownum) rowofsire(row2animal(rownum));
2018-09-21 22:42:23 +00:00
% Create the additive relationship matrix by the FORTRAN method presented
% by Henderson
for i = 1:numanimals
id = row2animal(i);
if isempty(row2rowofsire(i)) && isempty(row2rowofdam(i))
2018-09-21 22:42:23 +00:00
for j = 1:(i-1)
A(j,i) = 0;
end
A(i,i) = 1;
continue
end
if isempty(row2rowofsire(i)) && not(isempty(row2dam(i)))
2018-09-21 22:42:23 +00:00
for j = 1:(i-1)
A(j,i) = A(j,row2rowofdam(i));
A(i,j) = A(j,row2rowofdam(i));
2018-09-21 22:42:23 +00:00
end
A(i,i) = 1;
continue
end
if isempty(row2rowofdam(i)) && not(isempty(row2rowofsire(i)))
2018-09-21 22:42:23 +00:00
for j=1:(i-1)
A(j,i) = A(j,row2rowofsire(i));
A(i,j) = A(j,row2rowofsire(i));
2018-09-21 22:42:23 +00:00
end
A(i,i) = 1;
continue
end
for j=1:(i-1)
A(j,i) = 0.5.*(A(j,row2rowofsire(i)) + A(j,row2rowofdam(i)));
A(i,j) = 0.5.*(A(j,row2rowofsire(i)) + A(j,row2rowofdam(i)));
2018-09-21 22:42:23 +00:00
end
A(i,i) = 1 + 0.5.*A(row2rowofsire(i), row2rowofdam(i));
2018-09-21 22:42:23 +00:00
continue
end
2018-09-21 22:48:43 +00:00
2018-11-13 19:37:04 +00:00
disp('Creating the additive relationship matrix... Done!')
toc
disp(' ')
%% Perform BLUP
tic
disp(' ')
disp('Solving the mixed-model equations')
2018-09-21 22:48:43 +00:00
% Extract the observed data
2018-09-22 02:34:36 +00:00
Y = cell2mat(data(:, 5));
2018-09-21 22:48:43 +00:00
% The identity matrix for random effects
2018-09-22 02:34:36 +00:00
Z = eye(numanimals, numanimals);
2018-09-21 22:48:43 +00:00
% Remove items where there is no data
nullobs = find(isnan(Y));
Z(nullobs, nullobs) = 0;
2018-11-13 19:37:04 +00:00
% Calculate heritability
2018-09-21 22:48:43 +00:00
lambda = (1-h2)/h2;
% Use the mixed-model equations
solutions = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]\[X'*Y; Z'*Y];
% Find the accuracies
diaginv = diag(inv([X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*lambda)]));
2018-09-22 04:58:47 +00:00
reliability = 1 - diaginv.*lambda;
2018-11-13 19:37:04 +00:00
disp('Solving the mixed-model equations... Done!')
toc
disp(' ')
%% Output the results
tic
disp(' ')
disp('Saving results...')
2018-09-22 04:58:47 +00:00
% Find how many traits we found BLUE for
numgroups = numgroups - 1;
% Start printing results to output
fileID = fopen([savepath savename], 'w');
fprintf(fileID, 'beefblup Results Report\n');
fprintf(fileID, 'Produced using beefblup for MATLAB (');
fprintf(fileID, '%s', 'https://github.com/millironx/beefblup');
fprintf(fileID, ')\n\n');
fprintf(fileID, 'Input:\t');
fprintf(fileID, '%s', fullname);
fprintf(fileID, '\nAnalysis performed:\t');
fprintf(fileID, date);
fprintf(fileID, '\nTrait examined:\t');
fprintf(fileID, [headers{5}]);
fprintf(fileID, '\n\n');
% Print base population stats
fprintf(fileID, 'Base Population:\n');
for i = 1:length(numgroups)
fprintf(fileID, '\t');
fprintf(fileID, [headers{i+5}]);
fprintf(fileID, ':\t');
fprintf(fileID, [normal{i}]);
fprintf(fileID, '\n');
end
fprintf(fileID, '\tMean ');
fprintf(fileID, [headers{5}]);
fprintf(fileID, ':\t');
fprintf(fileID, num2str(solutions(1)));
fprintf(fileID, '\n\n');
I = 2;
% Contemporary group adjustments
fprintf(fileID, 'Contemporary Group Effects:\n');
for i = 1:length(numgroups)
fprintf(fileID, '\t');
fprintf(fileID, [headers{i+5}]);
fprintf(fileID, '\tEffect\tReliability\n');
for j = 1:numgroups(i)
fprintf(fileID, '\t');
fprintf(fileID, [adjustedtraits{I-1}]);
fprintf(fileID, '\t');
fprintf(fileID, num2str(solutions(I)));
fprintf(fileID, '\t');
fprintf(fileID, num2str(reliability(I)));
fprintf(fileID, '\n');
I = I + 1;
end
fprintf(fileID, '\n');
end
fprintf(fileID, '\n');
% Expected breeding values
fprintf(fileID, 'Expected Breeding Values:\n');
fprintf(fileID, '\tID\tEBV\tReliability\n');
for i = 1:numanimals
fprintf(fileID, '\t');
fprintf(fileID, [data{i,1}]);
fprintf(fileID, '\t');
fprintf(fileID, num2str(solutions(i+I-1)));
fprintf(fileID, '\t');
fprintf(fileID, num2str(reliability(i+I-1)));
fprintf(fileID, '\n');
end
fprintf(fileID, '\n - END REPORT -');
2018-11-13 19:37:04 +00:00
fclose(fileID);
disp('Saving results... Done!')
toc
disp(' ')