mirror of
https://github.com/MillironX/beefblup.git
synced 2024-12-22 09:08:16 +00:00
Initial pass at input file change
Known to produce wrong results
This commit is contained in:
parent
5d7ecceef8
commit
f450295b20
1 changed files with 27 additions and 39 deletions
|
@ -6,7 +6,8 @@
|
||||||
# Licensed under BSD-3-Clause License
|
# Licensed under BSD-3-Clause License
|
||||||
|
|
||||||
# Import the required packages
|
# Import the required packages
|
||||||
using XLSX
|
using CSV
|
||||||
|
using DataFrames
|
||||||
using LinearAlgebra
|
using LinearAlgebra
|
||||||
using Dates
|
using Dates
|
||||||
using Gtk
|
using Gtk
|
||||||
|
@ -22,7 +23,7 @@ print("\n")
|
||||||
path = open_dialog_native(
|
path = open_dialog_native(
|
||||||
"Select a beefblup worksheet",
|
"Select a beefblup worksheet",
|
||||||
GtkNullContainer(),
|
GtkNullContainer(),
|
||||||
("*.xlsx", GtkFileFilter("*.xlsx", name="beefblup worksheet"))
|
("*.csv", GtkFileFilter("*.csv", name="beefblup worksheet"))
|
||||||
)
|
)
|
||||||
|
|
||||||
# Ask for an output text filename
|
# Ask for an output text filename
|
||||||
|
@ -38,58 +39,45 @@ print("What is the heritability for this trait?> ")
|
||||||
h2 = parse(Float64, readline(stdin))
|
h2 = parse(Float64, readline(stdin))
|
||||||
|
|
||||||
### Import input filename
|
### Import input filename
|
||||||
print("[🐮]: Importing Excel file...")
|
print("[🐮]: Importing data file...")
|
||||||
|
|
||||||
# Import data from a suitable spreadsheet
|
# Import data from a suitable spreadsheet
|
||||||
data = XLSX.readxlsx(path)[1][:]
|
data = CSV.File(path) |> DataFrame
|
||||||
|
|
||||||
print("Done!\n")
|
print("Done!\n")
|
||||||
|
|
||||||
### Process input file
|
### Process input file
|
||||||
print("[🐮]: Processing and formatting data...")
|
print("[🐮]: Processing and formatting data...")
|
||||||
|
|
||||||
# Extract the headers into a separate array
|
|
||||||
headers = data[1,:]
|
|
||||||
data = data[2:end,:]
|
|
||||||
|
|
||||||
# Sort the array by date
|
# Sort the array by date
|
||||||
data = sortslices(data, dims=1, lt=(x,y)->isless(x[2],y[2]))
|
sort!(data, :birthdate)
|
||||||
|
|
||||||
# Define fields to hold id values for animals and their parents
|
# Define fields to hold id values for animals and their parents
|
||||||
ids = string.(data[:,1])
|
numanimals = length(data.id)
|
||||||
damids = string.(data[:,3])
|
|
||||||
sireids = string.(data[:,4])
|
|
||||||
numanimals = length(ids)
|
|
||||||
|
|
||||||
# Find the index values for animals and their parents
|
# Find the index values for animals and their parents
|
||||||
dam = indexin(damids, ids)
|
dam = indexin(data.dam, data.id)
|
||||||
sire = indexin(sireids, ids)
|
sire = indexin(data.sire, data.id)
|
||||||
|
|
||||||
# Store column numbers that need to be deleted
|
# Extract all of the fixed effects
|
||||||
# Column 6 contains an intermediate Excel calculation and always need to
|
fixedfx = select(data, Not([:id, :birthdate, :sire, :dam]))[:,1:end-1]
|
||||||
# be deleted
|
|
||||||
colstokeep = [1, 2, 3, 4, 5]
|
|
||||||
|
|
||||||
# Find any columns that need to be deleted
|
# Find any columns that need to be deleted
|
||||||
for i in 7:length(headers)
|
for i in 1:ncol(fixedfx)
|
||||||
if length(unique(data[:,i])) <= 1
|
if length(unique(fixedfx[:,i])) <= 1
|
||||||
colname = headers[i]
|
colname = names(fixedfx)[i]
|
||||||
print("Column '")
|
print("Column '")
|
||||||
print(colname)
|
print(colname)
|
||||||
print("' does not have any unique animals and will be removed from this analysis\n")
|
print("' does not have any unique animals and will be removed from this analysis\n")
|
||||||
else
|
deletecols!(fixedfx,i)
|
||||||
push!(colstokeep, i)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Delete the appropriate columns from the datasheet and the headers
|
|
||||||
data = data[:, colstokeep]
|
|
||||||
headers = headers[colstokeep]
|
|
||||||
|
|
||||||
# Determine how many contemporary groups there are
|
# Determine how many contemporary groups there are
|
||||||
numgroups = ones(1, length(headers)-5)
|
numtraits = ncol(fixedfx)
|
||||||
for i in 6:length(headers)
|
numgroups = ones(1, numtraits)
|
||||||
numgroups[i-5] = length(unique(data[:,i]))
|
for i in 1:numtraits
|
||||||
|
numgroups[i] = length(unique(fixedfx[:,i]))
|
||||||
end
|
end
|
||||||
|
|
||||||
# If there are more groups than animals, then the analysis cannot continue
|
# If there are more groups than animals, then the analysis cannot continue
|
||||||
|
@ -101,11 +89,11 @@ end
|
||||||
|
|
||||||
# Define a "normal" animal as one of the last in the groups, provided that
|
# Define a "normal" animal as one of the last in the groups, provided that
|
||||||
# all traits do not have null values
|
# all traits do not have null values
|
||||||
normal = Array{String}(undef,1,length(headers)-5)
|
normal = Array{String}(undef,1,numtraits)
|
||||||
for i in 6:length(headers)
|
for i in 1:numtraits
|
||||||
for j in numanimals:-1:1
|
for j in numanimals:-1:1
|
||||||
if !ismissing(data[j,i])
|
if !ismissing(fixedfx[j,i])
|
||||||
normal[i-5] = string(data[j,i])
|
normal[i] = string(fixedfx[j,i])
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -129,12 +117,12 @@ Array{String}(undef,floor(Int,sum(numgroups))-length(numgroups))
|
||||||
# Iterate through each group
|
# Iterate through each group
|
||||||
for i in 1:length(normal)
|
for i in 1:length(normal)
|
||||||
# Find the traits that are present in this trait
|
# Find the traits that are present in this trait
|
||||||
localdata = string.(data[:,i+5])
|
localdata = string.(fixedfx[:,i])
|
||||||
traits = unique(localdata)
|
traits = unique(localdata)
|
||||||
# Remove the normal version from the analysis
|
# Remove the normal version from the analysis
|
||||||
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
effecttraits = traits[findall(x -> x != normal[i], traits)]
|
||||||
# Iterate inside of the group
|
# Iterate inside of the group
|
||||||
for j in 1:length(effecttraits)
|
for j in 1:(length(effecttraits) - 1)
|
||||||
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
matchedindex = findall(x -> x != effecttraits[j], localdata)
|
||||||
X[matchedindex, counter] .= 1
|
X[matchedindex, counter] .= 1
|
||||||
# Add this trait to the string
|
# Add this trait to the string
|
||||||
|
@ -188,7 +176,7 @@ print("Done!\n")
|
||||||
print("[🐮]: Solving the mixed-model equations...")
|
print("[🐮]: Solving the mixed-model equations...")
|
||||||
|
|
||||||
# Extract the observed data
|
# Extract the observed data
|
||||||
Y = convert(Array{Float64}, data[:,5])
|
Y = convert(Array{Float64}, data[:,end])
|
||||||
|
|
||||||
# The random effects matrix
|
# The random effects matrix
|
||||||
Z = Matrix{Int}(I, numanimals, numanimals)
|
Z = Matrix{Int}(I, numanimals, numanimals)
|
||||||
|
@ -273,7 +261,7 @@ write(fileID, "Expected Breeding Values:\n")
|
||||||
write(fileID, "\tID\tEBV\tReliability\n")
|
write(fileID, "\tID\tEBV\tReliability\n")
|
||||||
for i in 1:numanimals
|
for i in 1:numanimals
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, ids[i])
|
write(fileID, data.id[i])
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
write(fileID, string(solutions[i+counter-1]))
|
write(fileID, string(solutions[i+counter-1]))
|
||||||
write(fileID, "\t")
|
write(fileID, "\t")
|
||||||
|
|
Loading…
Reference in a new issue