142 lines
3.7 KiB
Julia
Executable file
142 lines
3.7 KiB
Julia
Executable file
#!/usr/bin/env julia
|
|
using CSV
|
|
using DataFrames
|
|
using Dates
|
|
using JSON3
|
|
using Logging
|
|
using PromptingTools
|
|
|
|
# Set logging to file
|
|
log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+")
|
|
logger = SimpleLogger(log_io)
|
|
global_logger(logger)
|
|
|
|
# Import data
|
|
survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true))
|
|
deleteat!(survey_data, 2)
|
|
|
|
# Set descriptions of each column based on the actual question asked
|
|
for (i, col) in enumerate(eachcol(survey_data))
|
|
colmetadata!(survey_data, i, "description", first(col))
|
|
end #for
|
|
|
|
# Remove the messy JSON encoding
|
|
# TODO: For later graphs, move this step _before_ the import so that DataFrames can properly
|
|
# infer types
|
|
# deleteat!(survey_data, [1,2])
|
|
|
|
# Compile comments from all questions and analyze
|
|
# We will be offloading the analysis to Ollama running Llama3.1 locally
|
|
questions = [:Q8, :Q16, :Q29, :Q30]
|
|
|
|
function logged_prompt(prompt)
|
|
@info "Prompting Llama3.1 with \n```$prompt\n```\n"
|
|
response = aigenerate(
|
|
PromptingTools.OllamaSchema(),
|
|
prompt;
|
|
model="llama3.1",
|
|
api_kwargs=(; options=(; num_gpu=99))
|
|
).content
|
|
@info "Llama3.1 responsed with \n```\n$response\n```\n"
|
|
return response
|
|
end #function
|
|
|
|
question_dict = Dict()
|
|
|
|
for q in questions
|
|
# Summarize the major themes among all answers
|
|
analysis_prompt = """
|
|
The following is a list of answers to a survey with one response per paragraph:
|
|
|
|
# Antimicrobial usage survey open-ended question: $q
|
|
|
|
$(
|
|
join(
|
|
[
|
|
i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q]))
|
|
],
|
|
'\n'
|
|
)
|
|
)
|
|
|
|
---
|
|
|
|
Summarize the common themes between the survey responses.
|
|
"""
|
|
|
|
analysis_response = logged_prompt(analysis_prompt)
|
|
|
|
|
|
# Compile all themes that Llama3 identified.
|
|
# Llama3 tends to summarize each theme with a bolded statement.
|
|
# We will extract the bolded statements to compile themes
|
|
themes = String[]
|
|
for l in eachline(IOBuffer(analysis_response))
|
|
m = match(r"^[1-9]+\. \*\*(.+)\*\*:", l)
|
|
isnothing(m) || push!(themes, first(m))
|
|
end #for
|
|
@info "Found themes $themes"
|
|
|
|
answer_dict = Dict()
|
|
|
|
# Now go back through each answer and check if it is addressing the theme noted
|
|
for (i, a) in enumerate(skipmissing(survey_data[!, q]))
|
|
i == 1 && continue #first "answer" is the question
|
|
|
|
theme_dict = Dict{String,Union{Bool,Missing}}()
|
|
|
|
for t in themes
|
|
theme_prompt = """
|
|
The following was answered as a free-response answer on a survey:
|
|
|
|
$a
|
|
|
|
---
|
|
|
|
Does this answer deal with the theme of $t? Answer yes or no.
|
|
"""
|
|
|
|
theme_response = logged_prompt(theme_prompt)
|
|
|
|
if startswith(lowercase(theme_response), "yes")
|
|
theme_dict[t] = true
|
|
elseif startswith(lowercase(theme_response), "no")
|
|
theme_dict[t] = false
|
|
else
|
|
theme_dict[t] = missing
|
|
end #if
|
|
end #for
|
|
|
|
answer_dict[a] = theme_dict
|
|
|
|
end #for
|
|
|
|
question_dict[q] = answer_dict
|
|
|
|
end #for
|
|
|
|
|
|
open("results.json", "w") do io
|
|
JSON3.write(io, question_dict)
|
|
end
|
|
|
|
|
|
# Compile comments from all requested questions
|
|
for q in questions
|
|
open("$q.md", "w") do f
|
|
write(f, "# Antimicrobial usage survey open-ended question: $q\n\n")
|
|
for (i, a) in enumerate(skipmissing(survey_data[!, q]))
|
|
if i == 1
|
|
write(f, "**$a**\n\n")
|
|
else
|
|
write(f, "$a\n\n")
|
|
end #if
|
|
end #for
|
|
end #do
|
|
|
|
run(`pandoc $q.md -o $q.docx`)
|
|
|
|
end #for
|
|
|
|
# Close log file
|
|
close(log_io)
|