mirror of
https://github.com/MillironX/beefblup.git
synced 2024-12-22 09:08:16 +00:00
Reformat document
This commit is contained in:
parent
289984be2f
commit
93564b247e
1 changed files with 170 additions and 171 deletions
341
src/BeefBLUP.jl
341
src/BeefBLUP.jl
|
@ -18,32 +18,32 @@ using Gtk
|
||||||
# Main entry-level function - acts just like the script
|
# Main entry-level function - acts just like the script
|
||||||
function beefblup()
|
function beefblup()
|
||||||
|
|
||||||
# Ask for an input spreadsheet
|
# Ask for an input spreadsheet
|
||||||
path = open_dialog_native(
|
path = open_dialog_native(
|
||||||
"Select a beefblup worksheet",
|
"Select a beefblup worksheet",
|
||||||
GtkNullContainer(),
|
GtkNullContainer(),
|
||||||
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ask for an output text filename
|
# Ask for an output text filename
|
||||||
savepath = save_dialog_native(
|
savepath = save_dialog_native(
|
||||||
"Save your beefblup results",
|
"Save your beefblup results",
|
||||||
GtkNullContainer(),
|
GtkNullContainer(),
|
||||||
(GtkFileFilter("*.txt", name="Results file"),
|
(GtkFileFilter("*.txt", name="Results file"),
|
||||||
"*.txt")
|
"*.txt")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ask for heritability
|
# Ask for heritability
|
||||||
print("What is the heritability for this trait?> ")
|
print("What is the heritability for this trait?> ")
|
||||||
h2 = parse(Float64, readline(stdin))
|
h2 = parse(Float64, readline(stdin))
|
||||||
|
|
||||||
beefblup(path, savepath, h2)
|
beefblup(path, savepath, h2)
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
function beefblup(datafile::String, h2::Float64)
|
function beefblup(datafile::String, h2::Float64)
|
||||||
# Assume the data is named the same as the file without the trailing extension
|
# Assume the data is named the same as the file without the trailing extension
|
||||||
dataname = join(split(datafile, ".")[1:end-1])
|
dataname = join(split(datafile, ".")[1:end - 1])
|
||||||
|
|
||||||
# Create a new results name
|
# Create a new results name
|
||||||
resultsfile = string(dataname, "_results.txt")
|
resultsfile = string(dataname, "_results.txt")
|
||||||
|
@ -55,211 +55,210 @@ end
|
||||||
# Main worker function, can perform all the work if given all the user input
|
# Main worker function, can perform all the work if given all the user input
|
||||||
function beefblup(path::String, savepath::String, h2::Float64)
|
function beefblup(path::String, savepath::String, h2::Float64)
|
||||||
|
|
||||||
# Import data from a suitable spreadsheet
|
# Import data from a suitable spreadsheet
|
||||||
data = DataFrame(CSV.File(path))
|
data = DataFrame(CSV.File(path))
|
||||||
|
|
||||||
# Sort the array by date
|
# Sort the array by date
|
||||||
sort!(data, :birthdate)
|
sort!(data, :birthdate)
|
||||||
|
|
||||||
# Define fields to hold id values for animals and their parents
|
# Define fields to hold id values for animals and their parents
|
||||||
numanimals = length(data.id)
|
numanimals = length(data.id)
|
||||||
|
|
||||||
# Find the index values for animals and their parents
|
# Find the index values for animals and their parents
|
||||||
dam = indexin(data.dam, data.id)
|
dam = indexin(data.dam, data.id)
|
||||||
sire = indexin(data.sire, data.id)
|
sire = indexin(data.sire, data.id)
|
||||||
|
|
||||||
# Extract all of the fixed effects
|
# Extract all of the fixed effects
|
||||||
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1]
|
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end - 1]
|
||||||
|
|
||||||
# Find any columns that need to be deleted
|
# Find any columns that need to be deleted
|
||||||
for i in 1:ncol(fixedfx)
|
for i in 1:ncol(fixedfx)
|
||||||
if length(unique(fixedfx[:,i])) <= 1
|
if length(unique(fixedfx[:,i])) <= 1
|
||||||
@warn string("column '", names(fixedfx)[i], "' does not have any unique animals and will be removed from this analysis")
|
@warn string("column '", names(fixedfx)[i], "' does not have any unique animals and will be removed from this analysis")
|
||||||
DataFrames.select!(fixedfx,Not(i))
|
DataFrames.select!(fixedfx, Not(i))
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Determine how many contemporary groups there are
|
|
||||||
numtraits = ncol(fixedfx)
|
|
||||||
numgroups = ones(1, numtraits)
|
|
||||||
for i in 1:numtraits
|
|
||||||
numgroups[i] = length(unique(fixedfx[:,i]))
|
|
||||||
end
|
|
||||||
|
|
||||||
# If there are more groups than animals, then the analysis cannot continue
|
|
||||||
if sum(numgroups) >= numanimals
|
|
||||||
throw(ErrorException("there are more contemporary groups than animals"))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Define a "normal" animal as one of the last in the groups, provided that
|
|
||||||
# all traits do not have null values
|
|
||||||
normal = Array{String}(undef,1,numtraits)
|
|
||||||
for i in 1:numtraits
|
|
||||||
for j in numanimals:-1:1
|
|
||||||
if !ismissing(fixedfx[j,i])
|
|
||||||
normal[i] = string(fixedfx[j,i])
|
|
||||||
break
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Form the fixed-effect matrix
|
# Determine how many contemporary groups there are
|
||||||
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
|
numtraits = ncol(fixedfx)
|
||||||
X[:,1] = ones(Int8, 1, numanimals)
|
numgroups = ones(1, numtraits)
|
||||||
|
for i in 1:numtraits
|
||||||
|
numgroups[i] = length(unique(fixedfx[:,i]))
|
||||||
|
end
|
||||||
|
|
||||||
# Create an external counter that will increment through both loops
|
# If there are more groups than animals, then the analysis cannot continue
|
||||||
counter = 2
|
if sum(numgroups) >= numanimals
|
||||||
|
throw(ErrorException("there are more contemporary groups than animals"))
|
||||||
|
end
|
||||||
|
|
||||||
# Store the traits in a string array
|
# Define a "normal" animal as one of the last in the groups, provided that
|
||||||
adjustedtraits =
|
# all traits do not have null values
|
||||||
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
normal = Array{String}(undef, 1, numtraits)
|
||||||
# Iterate through each group
|
for i in 1:numtraits
|
||||||
for i in 1:length(normal)
|
for j in numanimals:-1:1
|
||||||
# Find the traits that are present in this trait
|
if !ismissing(fixedfx[j,i])
|
||||||
localdata = string.(fixedfx[:,i])
|
normal[i] = string(fixedfx[j,i])
|
||||||
traits = unique(localdata)
|
break
|
||||||
# Remove the normal version from the analysis
|
end
|
||||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
end
|
||||||
# Iterate inside of the group
|
end
|
||||||
for j in 1:(length(effecttraits))
|
|
||||||
|
# Form the fixed-effect matrix
|
||||||
|
X = zeros(Int8, numanimals, floor(Int, sum(numgroups)) - length(numgroups) + 1)
|
||||||
|
X[:,1] = ones(Int8, 1, numanimals)
|
||||||
|
|
||||||
|
# Create an external counter that will increment through both loops
|
||||||
|
counter = 2
|
||||||
|
|
||||||
|
# Store the traits in a string array
|
||||||
|
adjustedtraits =
|
||||||
|
Array{String}(undef,floor(Int, sum(numgroups)) - length(numgroups))
|
||||||
|
# Iterate through each group
|
||||||
|
for i in 1:length(normal)
|
||||||
|
# Find the traits that are present in this trait
|
||||||
|
localdata = string.(fixedfx[:,i])
|
||||||
|
traits = unique(localdata)
|
||||||
|
# Remove the normal version from the analysis
|
||||||
|
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||||
|
# Iterate inside of the group
|
||||||
|
for j in 1:(length(effecttraits))
|
||||||
matchedindex = findall(x -> x == effecttraits[j], localdata)
|
matchedindex = findall(x -> x == effecttraits[j], localdata)
|
||||||
X[matchedindex, counter] .= 1
|
X[matchedindex, counter] .= 1
|
||||||
# Add this trait to the string
|
# Add this trait to the string
|
||||||
adjustedtraits[counter - 1] = traits[j]
|
adjustedtraits[counter - 1] = traits[j]
|
||||||
# Increment the big counter
|
# Increment the big counter
|
||||||
counter = counter + 1
|
counter = counter + 1
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Create an empty matrix for the additive relationship matrix
|
|
||||||
A = zeros(numanimals, numanimals)
|
|
||||||
|
|
||||||
# Create the additive relationship matrix by the FORTRAN method presented by
|
|
||||||
# Henderson
|
|
||||||
for i in 1:numanimals
|
|
||||||
if !isnothing(dam[i]) && !isnothing(sire[i])
|
|
||||||
for j in 1:(i-1)
|
|
||||||
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
|
|
||||||
A[i,j] = A[j,i]
|
|
||||||
end
|
end
|
||||||
A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
|
end
|
||||||
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
|
||||||
for j in 1:(i-1)
|
# Create an empty matrix for the additive relationship matrix
|
||||||
A[j,i] = 0.5*A[j,dam[i]]
|
A = zeros(numanimals, numanimals)
|
||||||
|
|
||||||
|
# Create the additive relationship matrix by the FORTRAN method presented by
|
||||||
|
# Henderson
|
||||||
|
for i in 1:numanimals
|
||||||
|
if !isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0.5 * (A[j,sire[i]] + A[j,dam[i]])
|
||||||
|
A[i,j] = A[j,i]
|
||||||
|
end
|
||||||
|
A[i,i] = 1 + 0.5 * A[sire[i], dam[i]]
|
||||||
|
elseif !isnothing(dam[i]) && isnothing(sire[i])
|
||||||
|
for j in 1:(i - 1)
|
||||||
|
A[j,i] = 0.5 * A[j,dam[i]]
|
||||||
A[i,j] = A[j,i]
|
A[i,j] = A[j,i]
|
||||||
end
|
end
|
||||||
A[i,i] = 1
|
A[i,i] = 1
|
||||||
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
elseif isnothing(dam[i]) && !isnothing(sire[i])
|
||||||
for j in 1:(i-1)
|
for j in 1:(i - 1)
|
||||||
A[j,i] = 0.5*A[j,sire[i]]
|
A[j,i] = 0.5 * A[j,sire[i]]
|
||||||
A[i,j] = A[j,i]
|
A[i,j] = A[j,i]
|
||||||
end
|
end
|
||||||
A[i,i] = 1
|
A[i,i] = 1
|
||||||
else
|
else
|
||||||
for j in 1:(i-1)
|
for j in 1:(i - 1)
|
||||||
A[j,i] = 0
|
A[j,i] = 0
|
||||||
A[i,j] = 0
|
A[i,j] = 0
|
||||||
end
|
end
|
||||||
A[i,i] = 1
|
A[i,i] = 1
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# Extract the observed data
|
# Extract the observed data
|
||||||
Y = convert(Array{Float64}, data[:,end])
|
Y = convert(Array{Float64}, data[:,end])
|
||||||
|
|
||||||
# The random effects matrix
|
# The random effects matrix
|
||||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||||
|
|
||||||
# Remove items where there is no data
|
# Remove items where there is no data
|
||||||
nullobs = findall(isnothing, Y)
|
nullobs = findall(isnothing, Y)
|
||||||
Z[nullobs, nullobs] .= 0
|
Z[nullobs, nullobs] .= 0
|
||||||
|
|
||||||
# Calculate heritability
|
# Calculate heritability
|
||||||
λ = (1-h2)/h2
|
λ = (1 - h2) / h2
|
||||||
|
|
||||||
# Use the mixed-model equations
|
# Use the mixed-model equations
|
||||||
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
|
MME = [X' * X X' * Z; Z' * X (Z' * Z) + (inv(A) .* λ)]
|
||||||
MMY = [X'*Y; Z'*Y]
|
MMY = [X' * Y; Z' * Y]
|
||||||
solutions = MME\MMY
|
solutions = MME \ MMY
|
||||||
|
|
||||||
# Find the accuracies
|
# Find the accuracies
|
||||||
diaginv = diag(inv(MME))
|
diaginv = diag(inv(MME))
|
||||||
reliability = ones(Float64, length(diaginv)) - diaginv.*λ
|
reliability = ones(Float64, length(diaginv)) - diaginv .* λ
|
||||||
|
|
||||||
# Find how many traits we found BLUE for
|
# Find how many traits we found BLUE for
|
||||||
numgroups = numgroups .- 1
|
numgroups = numgroups .- 1
|
||||||
|
|
||||||
# Extract the names of the traits
|
# Extract the names of the traits
|
||||||
fixedfxnames = names(fixedfx)
|
fixedfxnames = names(fixedfx)
|
||||||
traitname = names(data)[end]
|
traitname = names(data)[end]
|
||||||
|
|
||||||
# Start printing results to output
|
# Start printing results to output
|
||||||
fileID = open(savepath, "w")
|
fileID = open(savepath, "w")
|
||||||
write(fileID, "beefblup Results Report\n")
|
write(fileID, "beefblup Results Report\n")
|
||||||
write(fileID, "Produced using beefblup (")
|
write(fileID, "Produced using beefblup (")
|
||||||
write(fileID, "https://github.com/millironx/beefblup")
|
write(fileID, "https://github.com/millironx/beefblup")
|
||||||
write(fileID, ")\n\n")
|
write(fileID, ")\n\n")
|
||||||
write(fileID, "Input:\t")
|
write(fileID, "Input:\t")
|
||||||
write(fileID, path)
|
write(fileID, path)
|
||||||
write(fileID, "\nAnalysis performed:\t")
|
write(fileID, "\nAnalysis performed:\t")
|
||||||
write(fileID, string(Dates.today()))
|
write(fileID, string(Dates.today()))
|
||||||
write(fileID, "\nTrait examined:\t")
|
write(fileID, "\nTrait examined:\t")
|
||||||
write(fileID, traitname)
|
write(fileID, traitname)
|
||||||
write(fileID, "\n\n")
|
write(fileID, "\n\n")
|
||||||
|
|
||||||
# Print base population stats
|
# Print base population stats
|
||||||
write(fileID, "Base Population:\n")
|
write(fileID, "Base Population:\n")
|
||||||
for i in 1:length(normal)
|
for i in 1:length(normal)
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, fixedfxnames[i])
|
|
||||||
write(fileID, ":\t")
|
|
||||||
write(fileID, normal[i])
|
|
||||||
write(fileID, "\n")
|
|
||||||
end
|
|
||||||
write(fileID, "\tMean ")
|
|
||||||
write(fileID, traitname)
|
|
||||||
write(fileID, ":\t")
|
|
||||||
write(fileID, string(solutions[1]))
|
|
||||||
write(fileID, "\n\n")
|
|
||||||
|
|
||||||
# Contemporary group adjustments
|
|
||||||
counter = 2
|
|
||||||
write(fileID, "Contemporary Group Effects:\n")
|
|
||||||
for i in 1:length(numgroups)
|
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, fixedfxnames[i])
|
|
||||||
write(fileID, "\tEffect\tReliability\n")
|
|
||||||
for j in 1:numgroups[i]
|
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, adjustedtraits[counter - 1])
|
write(fileID, fixedfxnames[i])
|
||||||
write(fileID, "\t")
|
write(fileID, ":\t")
|
||||||
write(fileID, string(solutions[counter]))
|
write(fileID, normal[i])
|
||||||
write(fileID, "\t")
|
|
||||||
write(fileID, string(reliability[counter]))
|
|
||||||
write(fileID, "\n")
|
write(fileID, "\n")
|
||||||
|
end
|
||||||
|
write(fileID, "\tMean ")
|
||||||
|
write(fileID, traitname)
|
||||||
|
write(fileID, ":\t")
|
||||||
|
write(fileID, string(solutions[1]))
|
||||||
|
write(fileID, "\n\n")
|
||||||
|
|
||||||
counter = counter + 1
|
# Contemporary group adjustments
|
||||||
|
counter = 2
|
||||||
|
write(fileID, "Contemporary Group Effects:\n")
|
||||||
|
for i in 1:length(numgroups)
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, fixedfxnames[i])
|
||||||
|
write(fileID, "\tEffect\tReliability\n")
|
||||||
|
for j in 1:numgroups[i]
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, adjustedtraits[counter - 1])
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(solutions[counter]))
|
||||||
|
write(fileID, "\t")
|
||||||
|
write(fileID, string(reliability[counter]))
|
||||||
|
write(fileID, "\n")
|
||||||
|
|
||||||
|
counter = counter + 1
|
||||||
|
end
|
||||||
|
write(fileID, "\n")
|
||||||
end
|
end
|
||||||
write(fileID, "\n")
|
write(fileID, "\n")
|
||||||
end
|
|
||||||
write(fileID, "\n")
|
|
||||||
|
|
||||||
# Expected breeding values
|
# Expected breeding values
|
||||||
write(fileID, "Expected Breeding Values:\n")
|
write(fileID, "Expected Breeding Values:\n")
|
||||||
write(fileID, "\tID\tEBV\tReliability\n")
|
write(fileID, "\tID\tEBV\tReliability\n")
|
||||||
for i in 1:numanimals
|
for i in 1:numanimals
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(data.id[i]))
|
write(fileID, string(data.id[i]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(solutions[i+counter-1]))
|
write(fileID, string(solutions[i + counter - 1]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(reliability[i+counter-1]))
|
write(fileID, string(reliability[i + counter - 1]))
|
||||||
write(fileID, "\n")
|
write(fileID, "\n")
|
||||||
end
|
end
|
||||||
|
|
||||||
write(fileID, "\n - END REPORT -")
|
|
||||||
close(fileID)
|
|
||||||
|
|
||||||
|
write(fileID, "\n - END REPORT -")
|
||||||
|
close(fileID)
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue