1
0
Fork 0
mirror of https://github.com/MillironX/beefblup.git synced 2024-12-22 09:08:16 +00:00

Switch to UNIX line endings

This commit is contained in:
Thomas A. Christensen II 2021-06-18 12:07:58 -05:00
parent de0eb6f5a9
commit 82bf3cfa89
Signed by: millironx
GPG key ID: 139C07724802BC5D

562
src/beefblup.jl Normal file → Executable file
View file

@ -1,281 +1,281 @@
#!/bin/bash #!/bin/bash
#= #=
exec julia --project=$(realpath $(dirname $(dirname "${BASH_SOURCE[0]}"))) "${BASH_SOURCE[0]}" "$@" exec julia --project=$(realpath $(dirname $(dirname "${BASH_SOURCE[0]}"))) "${BASH_SOURCE[0]}" "$@"
=# =#
# beefblup # beefblup
# Main script for performing single-variate BLUP to find beef cattle # Main script for performing single-variate BLUP to find beef cattle
# breeding values # breeding values
# Usage: julia beefblup.jl # Usage: julia beefblup.jl
# (C) 2020 Thomas A. Christensen II # (C) 2020 Thomas A. Christensen II
# Licensed under BSD-3-Clause License # Licensed under BSD-3-Clause License
# cSpell:includeRegExp #.* # cSpell:includeRegExp #.*
# cSpell:includeRegExp ("""|''')[^\1]*\1 # cSpell:includeRegExp ("""|''')[^\1]*\1
# Import the required packages # Import the required packages
using CSV using CSV
using DataFrames using DataFrames
using LinearAlgebra using LinearAlgebra
using Dates using Dates
using Gtk using Gtk
# Display stuff # Display stuff
println("beefblup v 0.1") println("beefblup v 0.1")
println("(C) 2020 Thomas A. Christensen II") println("(C) 2020 Thomas A. Christensen II")
println("https://github.com/millironx/beefblup") println("https://github.com/millironx/beefblup")
print("\n") print("\n")
### Prompt User ### Prompt User
# Ask for an input spreadsheet # Ask for an input spreadsheet
path = open_dialog_native( path = open_dialog_native(
"Select a beefblup worksheet", "Select a beefblup worksheet",
GtkNullContainer(), GtkNullContainer(),
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet")) ("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
) )
# Ask for an output text filename # Ask for an output text filename
savepath = save_dialog_native( savepath = save_dialog_native(
"Save your beefblup results", "Save your beefblup results",
GtkNullContainer(), GtkNullContainer(),
(GtkFileFilter("*.txt", name="Results file"), (GtkFileFilter("*.txt", name="Results file"),
"*.txt") "*.txt")
) )
# Ask for heritability # Ask for heritability
print("What is the heritability for this trait?> ") print("What is the heritability for this trait?> ")
h2 = parse(Float64, readline(stdin)) h2 = parse(Float64, readline(stdin))
### Import input filename ### Import input filename
print("[🐮]: Importing data file...") print("[🐮]: Importing data file...")
# Import data from a suitable spreadsheet # Import data from a suitable spreadsheet
data = CSV.File(path) |> DataFrame data = CSV.File(path) |> DataFrame
print("Done!\n") print("Done!\n")
### Process input file ### Process input file
print("[🐮]: Processing and formatting data...") print("[🐮]: Processing and formatting data...")
# Sort the array by date # Sort the array by date
sort!(data, :birthdate) sort!(data, :birthdate)
# Define fields to hold id values for animals and their parents # Define fields to hold id values for animals and their parents
numanimals = length(data.id) numanimals = length(data.id)
# Find the index values for animals and their parents # Find the index values for animals and their parents
dam = indexin(data.dam, data.id) dam = indexin(data.dam, data.id)
sire = indexin(data.sire, data.id) sire = indexin(data.sire, data.id)
# Extract all of the fixed effects # Extract all of the fixed effects
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1] fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1]
# Find any columns that need to be deleted # Find any columns that need to be deleted
for i in 1:ncol(fixedfx) for i in 1:ncol(fixedfx)
if length(unique(fixedfx[:,i])) <= 1 if length(unique(fixedfx[:,i])) <= 1
colname = names(fixedfx)[i] colname = names(fixedfx)[i]
print("Column '") print("Column '")
print(colname) print(colname)
print("' does not have any unique animals and will be removed from this analysis\n") print("' does not have any unique animals and will be removed from this analysis\n")
deletecols!(fixedfx,i) deletecols!(fixedfx,i)
end end
end end
# Determine how many contemporary groups there are # Determine how many contemporary groups there are
numtraits = ncol(fixedfx) numtraits = ncol(fixedfx)
numgroups = ones(1, numtraits) numgroups = ones(1, numtraits)
for i in 1:numtraits for i in 1:numtraits
numgroups[i] = length(unique(fixedfx[:,i])) numgroups[i] = length(unique(fixedfx[:,i]))
end end
# If there are more groups than animals, then the analysis cannot continue # If there are more groups than animals, then the analysis cannot continue
if sum(numgroups) >= numanimals if sum(numgroups) >= numanimals
println("There are more contemporary groups than animals. The analysis will println("There are more contemporary groups than animals. The analysis will
now abort.") now abort.")
exit() exit()
end end
# Define a "normal" animal as one of the last in the groups, provided that # Define a "normal" animal as one of the last in the groups, provided that
# all traits do not have null values # all traits do not have null values
normal = Array{String}(undef,1,numtraits) normal = Array{String}(undef,1,numtraits)
for i in 1:numtraits for i in 1:numtraits
for j in numanimals:-1:1 for j in numanimals:-1:1
if !ismissing(fixedfx[j,i]) if !ismissing(fixedfx[j,i])
normal[i] = string(fixedfx[j,i]) normal[i] = string(fixedfx[j,i])
break break
end end
end end
end end
print("Done!\n") print("Done!\n")
### Create the fixed-effect matrix ### Create the fixed-effect matrix
print("[🐮]: Creating the fixed-effect matrix...") print("[🐮]: Creating the fixed-effect matrix...")
# Form the fixed-effect matrix # Form the fixed-effect matrix
X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1) X = zeros(Int8, numanimals, floor(Int,sum(numgroups))-length(numgroups)+1)
X[:,1] = ones(Int8, 1, numanimals) X[:,1] = ones(Int8, 1, numanimals)
# Create an external counter that will increment through both loops # Create an external counter that will increment through both loops
counter = 2 counter = 2
# Store the traits in a string array # Store the traits in a string array
adjustedtraits = adjustedtraits =
Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups)) Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
# Iterate through each group # Iterate through each group
for i in 1:length(normal) for i in 1:length(normal)
# Find the traits that are present in this trait # Find the traits that are present in this trait
localdata = string.(fixedfx[:,i]) localdata = string.(fixedfx[:,i])
traits = unique(localdata) traits = unique(localdata)
# Remove the normal version from the analysis # Remove the normal version from the analysis
effecttraits = traits[findall(x -> x != normal[i], traits)] effecttraits = traits[findall(x -> x != normal[i], traits)]
# Iterate inside of the group # Iterate inside of the group
for j in 1:(length(effecttraits) - 1) for j in 1:(length(effecttraits) - 1)
matchedindex = findall(x -> x != effecttraits[j], localdata) matchedindex = findall(x -> x != effecttraits[j], localdata)
X[matchedindex, counter] .= 1 X[matchedindex, counter] .= 1
# Add this trait to the string # Add this trait to the string
adjustedtraits[counter - 1] = traits[j] adjustedtraits[counter - 1] = traits[j]
# Increment the big counter # Increment the big counter
global counter = counter + 1 global counter = counter + 1
end end
end end
print("Done!\n") print("Done!\n")
### Additive relationship matrix ### Additive relationship matrix
print("[🐮]: Creating additive relationship matrix...") print("[🐮]: Creating additive relationship matrix...")
# Create an empty matrix for the additive relationship matrix # Create an empty matrix for the additive relationship matrix
A = zeros(numanimals, numanimals) A = zeros(numanimals, numanimals)
# Create the additive relationship matrix by the FORTRAN method presented by # Create the additive relationship matrix by the FORTRAN method presented by
# Henderson # Henderson
for i in 1:numanimals for i in 1:numanimals
if !isnothing(dam[i]) && !isnothing(sire[i]) if !isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i-1) for j in 1:(i-1)
A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]]) A[j,i] = 0.5*(A[j,sire[i]] + A[j,dam[i]])
A[i,j] = A[j,i] A[i,j] = A[j,i]
end end
A[i,i] = 1 + 0.5*A[sire[i], dam[i]] A[i,i] = 1 + 0.5*A[sire[i], dam[i]]
elseif !isnothing(dam[i]) && isnothing(sire[i]) elseif !isnothing(dam[i]) && isnothing(sire[i])
for j in 1:(i-1) for j in 1:(i-1)
A[j,i] = 0.5*A[j,dam[i]] A[j,i] = 0.5*A[j,dam[i]]
A[i,j] = A[j,i] A[i,j] = A[j,i]
end end
A[i,i] = 1 A[i,i] = 1
elseif isnothing(dam[i]) && !isnothing(sire[i]) elseif isnothing(dam[i]) && !isnothing(sire[i])
for j in 1:(i-1) for j in 1:(i-1)
A[j,i] = 0.5*A[j,sire[i]] A[j,i] = 0.5*A[j,sire[i]]
A[i,j] = A[j,i] A[i,j] = A[j,i]
end end
A[i,i] = 1 A[i,i] = 1
else else
for j in 1:(i-1) for j in 1:(i-1)
A[j,i] = 0 A[j,i] = 0
A[i,j] = 0 A[i,j] = 0
end end
A[i,i] = 1 A[i,i] = 1
end end
end end
print("Done!\n") print("Done!\n")
### Perform BLUP ### Perform BLUP
print("[🐮]: Solving the mixed-model equations...") print("[🐮]: Solving the mixed-model equations...")
# Extract the observed data # Extract the observed data
Y = convert(Array{Float64}, data[:,end]) Y = convert(Array{Float64}, data[:,end])
# The random effects matrix # The random effects matrix
Z = Matrix{Int}(I, numanimals, numanimals) Z = Matrix{Int}(I, numanimals, numanimals)
# Remove items where there is no data # Remove items where there is no data
nullobs = findall(isnothing, Y) nullobs = findall(isnothing, Y)
Z[nullobs, nullobs] .= 0 Z[nullobs, nullobs] .= 0
# Calculate heritability # Calculate heritability
λ = (1-h2)/h2 λ = (1-h2)/h2
# Use the mixed-model equations # Use the mixed-model equations
MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)] MME = [X'*X X'*Z; Z'*X (Z'*Z)+(inv(A).*λ)]
MMY = [X'*Y; Z'*Y] MMY = [X'*Y; Z'*Y]
solutions = MME\MMY solutions = MME\MMY
# Find the accuracies # Find the accuracies
diaginv = diag(inv(MME)) diaginv = diag(inv(MME))
reliability = ones(Float64, length(diaginv)) - diaginv.*λ reliability = ones(Float64, length(diaginv)) - diaginv.*λ
print("Done!\n") print("Done!\n")
### Output the results ### Output the results
print("[🐮]: Saving results...") print("[🐮]: Saving results...")
# Find how many traits we found BLUE for # Find how many traits we found BLUE for
numgroups = numgroups .- 1 numgroups = numgroups .- 1
# Start printing results to output # Start printing results to output
fileID = open(savepath, "w") fileID = open(savepath, "w")
write(fileID, "beefblup Results Report\n") write(fileID, "beefblup Results Report\n")
write(fileID, "Produced using beefblup for Julia (") write(fileID, "Produced using beefblup for Julia (")
write(fileID, "https://github.com/millironx/beefblup") write(fileID, "https://github.com/millironx/beefblup")
write(fileID, ")\n\n") write(fileID, ")\n\n")
write(fileID, "Input:\t") write(fileID, "Input:\t")
write(fileID, path) write(fileID, path)
write(fileID, "\nAnalysis performed:\t") write(fileID, "\nAnalysis performed:\t")
write(fileID, string(Dates.today())) write(fileID, string(Dates.today()))
write(fileID, "\nTrait examined:\t") write(fileID, "\nTrait examined:\t")
write(fileID, headers[5]) write(fileID, headers[5])
write(fileID, "\n\n") write(fileID, "\n\n")
# Print base population stats # Print base population stats
write(fileID, "Base Population:\n") write(fileID, "Base Population:\n")
for i in 1:length(numgroups) for i in 1:length(numgroups)
write(fileID, "\t") write(fileID, "\t")
write(fileID, headers[i+5]) write(fileID, headers[i+5])
write(fileID, ":\t") write(fileID, ":\t")
write(fileID, normal[i]) write(fileID, normal[i])
write(fileID, "\n") write(fileID, "\n")
end end
write(fileID, "\tMean ") write(fileID, "\tMean ")
write(fileID, headers[5]) write(fileID, headers[5])
write(fileID, ":\t") write(fileID, ":\t")
write(fileID, string(solutions[1])) write(fileID, string(solutions[1]))
write(fileID, "\n\n") write(fileID, "\n\n")
# Contemporary group adjustments # Contemporary group adjustments
counter = 2 counter = 2
write(fileID, "Contemporary Group Effects:\n") write(fileID, "Contemporary Group Effects:\n")
for i in 1:length(numgroups) for i in 1:length(numgroups)
write(fileID, "\t") write(fileID, "\t")
write(fileID, headers[i+5]) write(fileID, headers[i+5])
write(fileID, "\tEffect\tReliability\n") write(fileID, "\tEffect\tReliability\n")
for j in 1:numgroups[i] for j in 1:numgroups[i]
write(fileID, "\t") write(fileID, "\t")
write(fileID, adjustedtraits[counter - 1]) write(fileID, adjustedtraits[counter - 1])
write(fileID, "\t") write(fileID, "\t")
write(fileID, string(solutions[counter])) write(fileID, string(solutions[counter]))
write(fileID, "\t") write(fileID, "\t")
write(fileID, string(reliability[counter])) write(fileID, string(reliability[counter]))
write(fileID, "\n") write(fileID, "\n")
global counter = counter + 1 global counter = counter + 1
end end
write(fileID, "\n") write(fileID, "\n")
end end
write(fileID, "\n") write(fileID, "\n")
# Expected breeding values # Expected breeding values
write(fileID, "Expected Breeding Values:\n") write(fileID, "Expected Breeding Values:\n")
write(fileID, "\tID\tEBV\tReliability\n") write(fileID, "\tID\tEBV\tReliability\n")
for i in 1:numanimals for i in 1:numanimals
write(fileID, "\t") write(fileID, "\t")
write(fileID, data.id[i]) write(fileID, data.id[i])
write(fileID, "\t") write(fileID, "\t")
write(fileID, string(solutions[i+counter-1])) write(fileID, string(solutions[i+counter-1]))
write(fileID, "\t") write(fileID, "\t")
write(fileID, string(reliability[i+counter-1])) write(fileID, string(reliability[i+counter-1]))
write(fileID, "\n") write(fileID, "\n")
end end
write(fileID, "\n - END REPORT -") write(fileID, "\n - END REPORT -")
close(fileID) close(fileID)
print("Done!\n") print("Done!\n")