# beefblup # Julia package for performing single-variate BLUP to find beef cattle # breeding values # (C) 2021 Thomas A. Christensen II # Licensed under BSD-3-Clause License # cSpell:includeRegExp #.* # cSpell:includeRegExp ("""|''')[^\1]*\1 module BeefBLUP # Import the required packages using CSV using DataFrames using LinearAlgebra using Dates using Gtk # Main entry-level function - acts just like the script function beefblup() # Ask for an input spreadsheet path = open_dialog_native( "Select a beefblup worksheet", GtkNullContainer(), ("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet")) ) # Ask for an output text filename savepath = save_dialog_native( "Save your beefblup results", GtkNullContainer(), (GtkFileFilter("*.txt", name="Results file"), "*.txt") ) # Ask for heritability print("What is the heritability for this trait?> ") h2 = parse(Float64, readline(stdin)) beefblup(path, savepath, h2) end function beefblup(datafile::String, h2::Float64) # Assume the data is named the same as the file without the trailing extension dataname = join(split(datafile, ".")[1:end - 1]) # Create a new results name resultsfile = string(dataname, "_results.txt") # Pass this info on to the worker beefblup(datafile, resultsfile, h2) end # Main worker function, can perform all the work if given all the user input function beefblup(path::String, savepath::String, h2::Float64) # Import data from a suitable spreadsheet data = DataFrame(CSV.File(path)) # Make sure the data is in the proper format renamecolstospec!(data) # Sort the array by date sort!(data, :birthdate) # Define fields to hold id values for animals and their parents numanimals = length(data.id) # Calculate the relationship matrix A = additiverelationshipmatrix(data.id, data.dam, data.sire) # Extract all of the fixed effects fixedeffectdata = data[:,5:end-1] (X, fixedeffects) = fixedeffectmatrix(fixedeffectdata) # Extract the observed data Y = convert(Array{Float64}, data[:,end]) # The random effects matrix Z = Matrix{Int}(I, numanimals, numanimals) # Remove items where there is no data nullobs = findall(isnothing, Y) Z[nullobs, nullobs] .= 0 # Calculate heritability λ = (1 - h2) / h2 # Use the mixed-model equations MME = [X' * X X' * Z; Z' * X (Z' * Z) + (inv(A) .* λ)] MMY = [X' * Y; Z' * Y] solutions = MME \ MMY # Find the accuracies diaginv = diag(inv(MME)) reliability = ones(Float64, length(diaginv)) .- diaginv .* λ # Find how many traits we found BLUE for numgroups = length(reliability) - numanimals # Split the BLUP and BLUE results β = solutions[1:numgroups] μ = solutions[numgroups+1:end] μ_reliability = reliability[numgroups+1:end] # Extract the names of the traits traitname = names(data)[end] # Start printing results to output fileID = open(savepath, "w") write(fileID, "beefblup Results Report\n") write(fileID, "Produced using beefblup (") write(fileID, "https://github.com/millironx/beefblup") write(fileID, ")\n\n") write(fileID, "Input:\t") write(fileID, path) write(fileID, "\nAnalysis performed:\t") write(fileID, string(Dates.today())) write(fileID, "\nTrait examined:\t") write(fileID, traitname) write(fileID, "\n\n") # Print base population stats write(fileID, "Base Population:\n") for i in 1:length(fixedeffects) effect = fixedeffects[i] write(fileID, "\t") write(fileID, string(effect.name)) write(fileID, ":\t") write(fileID, string(effect.basetrait)) write(fileID, "\n") end write(fileID, "\tMean ") write(fileID, traitname) write(fileID, ":\t") write(fileID, string(solutions[1])) write(fileID, "\n\n") # Contemporary group adjustments counter = 2 write(fileID, "Contemporary Group Effects:\n") for i in 1:length(fixedeffects) effect = fixedeffects[i] write(fileID, "\t") write(fileID, effect.name) write(fileID, "\tEffect\n") for j in 1:length(effect.alltraits) types = effect.alltraits[j] write(fileID, "\t") write(fileID, string(types)) write(fileID, "\t") write(fileID, string(β[counter])) write(fileID, "\n") counter = counter + 1 end write(fileID, "\n") end write(fileID, "\n") # Expected breeding values write(fileID, "Expected Breeding Values:\n") write(fileID, "\tID\tEBV\tReliability\n") for i in 1:numanimals write(fileID, "\t") write(fileID, string(data.id[i])) write(fileID, "\t") write(fileID, string(μ[i])) write(fileID, "\t") write(fileID, string(μ_reliability[i])) write(fileID, "\n") end write(fileID, "\n - END REPORT -") close(fileID) end """ fixedeffectmatrix(fixedeffectdata::DataFrame) Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed effects listed in `fixedeffectdata`. Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the actual matrix, and `fixedeffects` is the contemporary groupings. """ function fixedeffectmatrix(fixedeffectdata::DataFrame) # Declare an empty return matrix fixedeffects = FixedEffect[] # Add each trait to the array for i in 1:size(fixedeffectdata)[2] name = names(fixedeffectdata)[i] traits = eachcol(fixedeffectdata)[i] if length(unique(traits)) > 1 push!(fixedeffects, FixedEffect(name, traits)) else @warn string("column '", name, "' does not have any unique animals and will be dropped from analysis") pname = propertynames(fixedeffectdata)[i] DataFrames.select!(fixedeffectdata, Not(pname)) end end X = ones(Int64, (size(fixedeffectdata)[1], 1)) for i in 1:length(fixedeffects) trait = fixedeffects[i] for phenotype in trait.alltraits X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2) end end return X, fixedeffects end """ additiverelationshipmatrix(id, dam, sire) Returns the additive numerator relationship matrix based on the pedigree provided in `dam` and `sire` for animals in `id`. """ function additiverelationshipmatrix(id::AbstractVector, damid::AbstractVector, sireid::AbstractVector) # Sanity-check for valid pedigree if !(length(id) == length(damid) && length(damid) == length(sireid)) throw(ArgumentError("id, dam, and sire must be of the same length")) end # Convert to positions dam = indexin(damid, id) sire = indexin(sireid, id) # Calculate loop iterations numanimals = length(dam) # Create an empty matrix for the additive relationship matrix A = zeros(numanimals, numanimals) # Create the additive relationship matrix by the FORTRAN method presented by # Henderson for i in 1:numanimals if !isnothing(dam[i]) && !isnothing(sire[i]) for j in 1:(i - 1) A[j,i] = 0.5 * (A[j,sire[i]] + A[j,dam[i]]) A[i,j] = A[j,i] end A[i,i] = 1 + 0.5 * A[sire[i], dam[i]] elseif !isnothing(dam[i]) && isnothing(sire[i]) for j in 1:(i - 1) A[j,i] = 0.5 * A[j,dam[i]] A[i,j] = A[j,i] end A[i,i] = 1 elseif isnothing(dam[i]) && !isnothing(sire[i]) for j in 1:(i - 1) A[j,i] = 0.5 * A[j,sire[i]] A[i,j] = A[j,i] end A[i,i] = 1 else for j in 1:(i - 1) A[j,i] = 0 A[i,j] = 0 end A[i,i] = 1 end end return A end """ renamecolstospec(::DataFrame) Renames the first four columns of the beefblup data sheet so that they can be referred to by name instead of by column index, regardless of user input. """ function renamecolstospec!(df::DataFrame) # Pull out the fixed-effect and observation name othernames = propertynames(df)[5:end] # Put specification column names and user-defined names together allnames = cat([:id, :birthdate, :dam, :sire], othernames, dims=1) # Rename in the DataFrame rename!(df, allnames, makeunique=true) return df end struct FixedEffect name::String basetrait::Any alltraits::AbstractArray{Any} end function FixedEffect(name::String, incidences) basetrait = last(unique(incidences)) types = unique(incidences)[1:end-1] return FixedEffect(name, basetrait, types) end end