#!/usr/bin/env julia
using CSV
using DataFrames
using Dates
using Logging

# Set logging to file
log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+")
logger = SimpleLogger(log_io)
global_logger(logger)

# Import data
survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true))
deleteat!(survey_data, 2)

# Set descriptions of each column based on the actual question asked
for (i, col) in enumerate(eachcol(survey_data))
    colmetadata!(survey_data, i, "description", first(col))
end #for

# Remove the messy JSON encoding
# TODO: For later graphs, move this step _before_ the import so that DataFrames can properly
# infer types
# deleteat!(survey_data, [1,2])

# Compile comments from all questions and analyze
# We will be offloading the analysis to Ollama running Llama3.1 locally
questions = [:Q8, :Q16, :Q29, :Q30]

run(`ollama serve`)

for q in questions
    analysis_prompt = """
    The following is a list of answers to a survey with one response per paragraph:

    # Antimicrobial usage survey open-ended question: $q

    $(
        join(
            [
                i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q]))
            ],
            '\n'
        )
    )

    ---

    Summarize the common themes between the survey responses.
    """

    @info "Prompting Llama3.1 with \n```\n$analysis_prompt\n```\n"

    analysis_response = read(run(`ollama run llama3.1 $analysis_prompt`), String)

    @info "Llama3.1 responsed with \n```\n$analysis_response\n```\n"
end #for


# Compile comments from all requested questions
for q in questions
    open("$q.md", "w") do f
        write(f, "# Antimicrobial usage survey open-ended question: $q\n\n")
        for (i, a) in enumerate(skipmissing(survey_data[!, q]))
            if i == 1
                write(f, "**$a**\n\n")
            else
                write(f, "$a\n\n")
            end #if
        end #for
    end #do

    run(`pandoc $q.md -o $q.docx`)

end #for

# Close log file
close(log_io)