1
0
Fork 0
mirror of https://github.com/MillironX/beefblup.git synced 2024-12-22 00:58:17 +00:00

Revamp fixed-effect algorithm

This commit is contained in:
Thomas A. Christensen II 2021-08-31 19:24:07 -05:00
parent 4b66ad8b1f
commit d4bb72c458
Signed by: millironx
GPG key ID: 139C07724802BC5D
2 changed files with 44 additions and 61 deletions

View file

@ -71,9 +71,9 @@ function beefblup(path::String, savepath::String, h2::Float64)
A = additiverelationshipmatrix(data.id, data.dam, data.sire) A = additiverelationshipmatrix(data.id, data.dam, data.sire)
# Extract all of the fixed effects # Extract all of the fixed effects
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end - 1] fixedeffectdata = data[:,5:end-1]
(X, numgroups, normal, adjustedtraits) = fixedeffectmatrix(fixedfx) (X, fixedeffects) = fixedeffectmatrix(fixedeffectdata)
# Extract the observed data # Extract the observed data
Y = convert(Array{Float64}, data[:,end]) Y = convert(Array{Float64}, data[:,end])
@ -173,71 +173,42 @@ function beefblup(path::String, savepath::String, h2::Float64)
end end
function fixedeffectmatrix(fixedeffects::AbstractDataFrame) """
# Find any columns that need to be deleted fixedeffectmatrix(fixedeffectdata::DataFrame)
for i in 1:ncol(fixedeffects)
if length(unique(fixedeffects[:,i])) <= 1 Creates contemporary groupings and the fixed-effect incidence matrix based on the fixed
@warn string("column '", names(fixedeffects)[i], "' does not have any unique animals and will be removed from this analysis") effects listed in `fixedeffectdata`.
DataFrames.select!(fixedeffects, Not(i))
Returns a tuple `(X::Matrix{Int}, fixedeffects::Array{FixedEffect})` in which `X` is the
actual matrix, and `fixedeffects` is the contemporary groupings.
"""
function fixedeffectmatrix(fixedeffectdata::DataFrame)
# Declare an empty return matrix
fixedeffects = FixedEffect[]
# Add each trait to the array
for i in 1:size(fixedeffectdata)[2]
name = names(fixedeffectdata)[i]
traits = eachcol(fixedeffectdata)[i]
if length(unique(traits)) > 1
push!(fixedeffects, FixedEffect(name, traits))
else
@warn string("column '", name, "' does not have any unique animals and will be dropped from analysis")
DataFrames.select!(fixedeffectdata, Not(pname))
end end
end end
# Determine how many contemporary groups there are X = ones(Int64, (size(fixedeffectdata)[1], 1))
numtraits = ncol(fixedeffects)
numgroups = ones(1, numtraits)
for i in 1:numtraits
numgroups[i] = length(unique(fixedeffects[:,i]))
end
# If there are more groups than animals, then the analysis cannot continue for i in 1:length(fixedeffects)
numanimals = length(fixedeffects[:,1]) trait = fixedeffects[i]
if sum(numgroups) >= numanimals for phenotype in trait.alltraits
throw(ErrorException("there are more contemporary groups than animals")) X = cat(X, Int64.(fixedeffectdata[:,i] .== phenotype), dims=2)
end
# Define a "normal" animal as one of the last in the groups, provided that
# all traits do not have null values
numtraits = ncol(fixedeffects)
numanimals = length(fixedeffects[:,1])
normal = Array{String}(undef, 1, numtraits)
for i in 1:numtraits
for j in numanimals:-1:1
if !ismissing(fixedeffects[j,i])
normal[i] = string(fixedeffects[j,i])
break
end
end end
end end
# Form the fixed-effect matrix return X, fixedeffects
X = zeros(Int8, numanimals, floor(Int, sum(numgroups)) - length(numgroups) + 1)
X[:,1] = ones(Int8, 1, numanimals)
# Create an external counter that will increment through both loops
counter = 2
# Store the traits in a string array
adjustedtraits =
Array{String}(undef,floor(Int, sum(numgroups)) - length(numgroups))
# Iterate through each group
for i in 1:length(normal)
# Find the traits that are present in this trait
localdata = string.(fixedeffects[:,i])
traits = unique(localdata)
# Remove the normal version from the analysis
effecttraits = traits[findall(x -> x != normal[i], traits)]
# Iterate inside of the group
for j in 1:(length(effecttraits))
matchedindex = findall(x -> x == effecttraits[j], localdata)
X[matchedindex, counter] .= 1
# Add this trait to the string
adjustedtraits[counter - 1] = traits[j]
# Increment the big counter
counter = counter + 1
end
end
return X, numgroups, normal, adjustedtraits
end end
""" """
@ -314,5 +285,17 @@ function renamecolstospec!(df::DataFrame)
return df return df
end end
struct FixedEffect
name::String
basetrait::Any
alltraits::AbstractArray{Any}
end
function FixedEffect(name::String, incidences)
basetrait = last(unique(incidences))
types = unique(incidences)[1:end-1]
return FixedEffect(name, basetrait, types)
end
end end

View file

@ -4,7 +4,7 @@ using Test
@testset "BeefBLUP.jl" begin @testset "BeefBLUP.jl" begin
# Write your tests here. # Write your tests here.
correctX = [1 1 0 0; 1 1 0 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 0 0] correctX = [1 1 0 1; 1 1 0 0; 1 0 1 1; 1 0 1 0; 1 0 1 1; 1 0 1 0; 1 0 0 1]
fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"]) fixedfx = DataFrame(year = [1990, 1990, 1991, 1991, 1991, 1991, 1992], sex = ["male", "female", "male", "female", "male", "female", "male"])
@test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX @test BeefBLUP.fixedeffectmatrix(fixedfx)[1] == correctX
correctA = [1 0 1/2 1/2 1/2 0 0; correctA = [1 0 1/2 1/2 1/2 0 0;