antimicrobial-use-survey-an.../main.jl

83 lines
2.1 KiB
Julia
Raw Normal View History

2024-09-03 18:50:16 -04:00
#!/usr/bin/env julia
using CSV
using DataFrames
2024-09-04 16:21:32 -04:00
using Dates
using Logging
2024-09-04 16:46:28 -04:00
using PromptingTools
2024-09-04 16:21:32 -04:00
# Set logging to file
log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+")
logger = SimpleLogger(log_io)
global_logger(logger)
2024-09-03 18:50:16 -04:00
# Import data
survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true))
deleteat!(survey_data, 2)
# Set descriptions of each column based on the actual question asked
for (i, col) in enumerate(eachcol(survey_data))
colmetadata!(survey_data, i, "description", first(col))
end #for
# Remove the messy JSON encoding
# TODO: For later graphs, move this step _before_ the import so that DataFrames can properly
# infer types
# deleteat!(survey_data, [1,2])
# Compile comments from all questions and analyze
# We will be offloading the analysis to Ollama running Llama3.1 locally
2024-09-03 18:50:16 -04:00
questions = [:Q8, :Q16, :Q29, :Q30]
for q in questions
analysis_prompt = """
The following is a list of answers to a survey with one response per paragraph:
# Antimicrobial usage survey open-ended question: $q
$(
join(
[
i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q]))
],
'\n'
)
)
---
Summarize the common themes between the survey responses.
"""
@info "Prompting Llama3.1 with \n```\n$analysis_prompt\n```\n"
analysis_response = aigenerate(
PromptingTools.OllamaSchema(),
analysis_prompt;
model="llama3.1",
api_kwargs=(; options=(; num_gpu=99))
).content
@info "Llama3.1 responsed with \n```\n$analysis_response\n```\n"
end #for
# Compile comments from all requested questions
2024-09-03 18:50:16 -04:00
for q in questions
open("$q.md", "w") do f
write(f, "# Antimicrobial usage survey open-ended question: $q\n\n")
for (i, a) in enumerate(skipmissing(survey_data[!, q]))
if i == 1
write(f, "**$a**\n\n")
else
write(f, "$a\n\n")
end #if
end #for
end #do
run(`pandoc $q.md -o $q.docx`)
end #for
2024-09-04 16:21:32 -04:00
# Close log file
close(log_io)