#!/usr/bin/env julia using CSV using DataFrames using Dates using Logging # Set logging to file log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+") logger = SimpleLogger(log_io) global_logger(logger) # Import data survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true)) deleteat!(survey_data, 2) # Set descriptions of each column based on the actual question asked for (i, col) in enumerate(eachcol(survey_data)) colmetadata!(survey_data, i, "description", first(col)) end #for # Remove the messy JSON encoding # TODO: For later graphs, move this step _before_ the import so that DataFrames can properly # infer types # deleteat!(survey_data, [1,2]) # Compile comments from all questions and analyze # We will be offloading the analysis to Ollama running Llama3.1 locally questions = [:Q8, :Q16, :Q29, :Q30] run(`ollama serve`) for q in questions analysis_prompt = """ The following is a list of answers to a survey with one response per paragraph: # Antimicrobial usage survey open-ended question: $q $( join( [ i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q])) ], '\n' ) ) --- Summarize the common themes between the survey responses. """ @info "Prompting Llama3.1 with \n```\n$analysis_prompt\n```\n" analysis_response = read(run(`ollama run llama3.1 $analysis_prompt`), String) @info "Llama3.1 responsed with \n```\n$analysis_response\n```\n" end #for # Compile comments from all requested questions for q in questions open("$q.md", "w") do f write(f, "# Antimicrobial usage survey open-ended question: $q\n\n") for (i, a) in enumerate(skipmissing(survey_data[!, q])) if i == 1 write(f, "**$a**\n\n") else write(f, "$a\n\n") end #if end #for end #do run(`pandoc $q.md -o $q.docx`) end #for # Close log file close(log_io)