From d4bb72c458b9a896f2ba5709c651b95d63a56d22 Mon Sep 17 00:00:00 2001 From: "Thomas A. Christensen II" <25492070+MillironX@users.noreply.github.com> Date: Tue, 31 Aug 2021 19:24:07 -0500 Subject: [PATCH] Revamp fixed-effect algorithm --- src/BeefBLUP.jl | 99 ++++++++++++++++++++---------------------------- test/runtests.jl | 2 +- 2 files changed, 42 insertions(+), 59 deletions(-) diff --git a/src/BeefBLUP.jl b/src/BeefBLUP.jl index 11e7834..5609fb3 100755 --- a/src/BeefBLUP.jl +++ b/src/BeefBLUP.jl @@ -71,9 +71,9 @@ function beefblup(path::String, savepath::String, h2::Float64) A = additiverelationshipmatrix(data.id, data.dam, data.sire) # Extract all of the fixed effects - fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end - 1] + fixedeffectdata = data[:,5:end-1] - (X, numgroups, normal, adjustedtraits) = fixedeffectmatrix(fixedfx) + (X, fixedeffects) = fixedeffectmatrix(fixedeffectdata) # Extract the observed data Y = convert(Array{Float64}, data[:,end]) @@ -173,71 +173,42 @@ function beefblup(path::String, savepath::String, h2::Float64) end -function fixedeffectmatrix(fixedeffects::AbstractDataFrame) - # Find any columns that need to be deleted - for i in 1:ncol(fixedeffects) - if length(unique(fixedeffects[:,i])) <= 1 - @warn string("column '", names(fixedeffects)[i], "' does not have any unique animals and will be removed from this analysis") - DataFrames.select!(fixedeffects, Not(i)) - end - end - - # Determine how many contemporary groups there are - numtraits = ncol(fixedeffects) - numgroups = ones(1, numtraits) - for i in 1:numtraits - numgroups[i] = length(unique(fixedeffects[:,i])) - end +""" + fixedeffectmatrix(fixedeffectdata::DataFrame) - # If there are more groups than animals, then the analysis cannot continue - numanimals = length(fixedeffects[:,1]) - if sum(numgroups) >= numanimals - throw(ErrorException("there are more contemporary groups than animals")) - end +Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed +effects listed in `fixedeffectdata`. - # Define a "normal" animal as one of the last in the groups, provided that - # all traits do not have null values - numtraits = ncol(fixedeffects) - numanimals = length(fixedeffects[:,1]) - normal = Array{String}(undef, 1, numtraits) - for i in 1:numtraits - for j in numanimals:-1:1 - if !ismissing(fixedeffects[j,i]) - normal[i] = string(fixedeffects[j,i]) - break - end +Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the +actual matrix, and `fixedeffects` is the contemporary groupings. +""" +function fixedeffectmatrix(fixedeffectdata::DataFrame) + # Declare an empty return matrix + fixedeffects = FixedEffect[] + + # Add each trait to the array + for i in 1:size(fixedeffectdata)[2] + name = names(fixedeffectdata)[i] + traits = eachcol(fixedeffectdata)[i] + + if length(unique(traits)) > 1 + push!(fixedeffects, FixedEffect(name, traits)) + else + @warn string("column '", name, "' does not have any unique animals and will be dropped from analysis") + DataFrames.select!(fixedeffectdata, Not(pname)) end end - # Form the fixed-effect matrix - X = zeros(Int8, numanimals, floor(Int, sum(numgroups)) - length(numgroups) + 1) - X[:,1] = ones(Int8, 1, numanimals) + X = ones(Int64, (size(fixedeffectdata)[1], 1)) - # Create an external counter that will increment through both loops - counter = 2 - - # Store the traits in a string array - adjustedtraits = - Array{String}(undef,floor(Int, sum(numgroups)) - length(numgroups)) - # Iterate through each group - for i in 1:length(normal) - # Find the traits that are present in this trait - localdata = string.(fixedeffects[:,i]) - traits = unique(localdata) - # Remove the normal version from the analysis - effecttraits = traits[findall(x -> x != normal[i], traits)] - # Iterate inside of the group - for j in 1:(length(effecttraits)) - matchedindex = findall(x -> x == effecttraits[j], localdata) - X[matchedindex, counter] .= 1 - # Add this trait to the string - adjustedtraits[counter - 1] = traits[j] - # Increment the big counter - counter = counter + 1 + for i in 1:length(fixedeffects) + trait = fixedeffects[i] + for phenotype in trait.alltraits + X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2) end end - return X, numgroups, normal, adjustedtraits + return X, fixedeffects end """ @@ -314,5 +285,17 @@ function renamecolstospec!(df::DataFrame) return df end +struct FixedEffect + name::String + basetrait::Any + alltraits::AbstractArray{Any} +end + +function FixedEffect(name::String, incidences) + basetrait = last(unique(incidences)) + types = unique(incidences)[1:end-1] + return FixedEffect(name, basetrait, types) +end + end diff --git a/test/runtests.jl b/test/runtests.jl index c92aa1e..9f2187e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,7 @@ using Test @testset "BeefBLUP.jl" begin # Write your tests here. - correctX = [1 1 0 0; 1 1 0 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 0 0] + correctX = [1 1 0 1; 1 1 0 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 0 1] fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"]) @test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX correctA = [1 0 1/2 1/2 1/2 0 0;