antimicrobial-use-survey-an.../main.jl

#!/usr/bin/env julia
using CSV
using DataFrames
using Dates
using JSON3
using Logging
using PromptingTools

# Set logging to file
log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+")
logger = SimpleLogger(log_io)
global_logger(logger)

# Import data
survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true))
deleteat!(survey_data, 2)

# Set descriptions of each column based on the actual question asked
for (i, col) in enumerate(eachcol(survey_data))
    colmetadata!(survey_data, i, "description", first(col))
end #for

# Remove the messy JSON encoding
# TODO: For later graphs, move this step _before_ the import so that DataFrames can properly
# infer types
# deleteat!(survey_data, [1,2])

# Compile comments from all questions and analyze
# We will be offloading the analysis to Ollama running Llama3.1 locally
questions = [:Q8, :Q16, :Q29, :Q30]

function logged_prompt(prompt)
    @info "Prompting Llama3.1 with \n```$prompt\n```\n"
    response = aigenerate(
        PromptingTools.OllamaSchema(),
        prompt;
        model="llama3.1",
        api_kwargs=(; options=(; num_gpu=99))
    ).content
    @info "Llama3.1 responsed with \n```\n$response\n```\n"
    return response
end #function

for q in questions
    analysis_prompt = """
    The following is a list of answers to a survey with one response per paragraph:

    # Antimicrobial usage survey open-ended question: $q

    $(
        join(
            [
                i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q]))
            ],
            '\n'
        )
    )

    ---

    Summarize the common themes between the survey responses.
    """

    analysis_response = logged_prompt(analysis_prompt)

end #for


# Compile comments from all requested questions
for q in questions
    open("$q.md", "w") do f
        write(f, "# Antimicrobial usage survey open-ended question: $q\n\n")
        for (i, a) in enumerate(skipmissing(survey_data[!, q]))
            if i == 1
                write(f, "**$a**\n\n")
            else
                write(f, "$a\n\n")
            end #if
        end #for
    end #do

    run(`pandoc $q.md -o $q.docx`)

end #for

# Close log file
close(log_io)
feat: Add response sorting script 2024-09-03 18:50:16 -04:00			`#!/usr/bin/env julia`
			`using CSV`
			`using DataFrames`
feat: Add logging to script 2024-09-04 16:21:32 -04:00			`using Dates`
chore: Add JSON3 2024-09-04 17:49:17 -04:00			`using JSON3`
feat: Add logging to script 2024-09-04 16:21:32 -04:00			`using Logging`
chore: Add PromptingTools 2024-09-04 16:46:28 -04:00			`using PromptingTools`
feat: Add logging to script 2024-09-04 16:21:32 -04:00
			`# Set logging to file`
			`log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+")`
			`logger = SimpleLogger(log_io)`
			`global_logger(logger)`
feat: Add response sorting script 2024-09-03 18:50:16 -04:00
			`# Import data`
			`survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true))`
			`deleteat!(survey_data, 2)`

			`# Set descriptions of each column based on the actual question asked`
			`for (i, col) in enumerate(eachcol(survey_data))`
			`colmetadata!(survey_data, i, "description", first(col))`
			`end #for`

			`# Remove the messy JSON encoding`
			`# TODO: For later graphs, move this step _before_ the import so that DataFrames can properly`
			`# infer types`
			`# deleteat!(survey_data, [1,2])`

wip: Make Ollama offloading step (broken) 2024-09-04 16:45:48 -04:00			`# Compile comments from all questions and analyze`
			`# We will be offloading the analysis to Ollama running Llama3.1 locally`
feat: Add response sorting script 2024-09-03 18:50:16 -04:00			`questions = [:Q8, :Q16, :Q29, :Q30]`
wip: Make Ollama offloading step (broken) 2024-09-04 16:45:48 -04:00
refactor: Add prompt-specific logic to function 2024-09-04 17:02:16 -04:00			`function logged_prompt(prompt)`
			@info "Prompting Llama3.1 with \n```$prompt\n```\n"
			`response = aigenerate(`
			`PromptingTools.OllamaSchema(),`
			`prompt;`
			`model="llama3.1",`
			`api_kwargs=(; options=(; num_gpu=99))`
			`).content`
			@info "Llama3.1 responsed with \n```\n$response\n```\n"
			`return response`
			`end #function`

wip: Make Ollama offloading step (broken) 2024-09-04 16:45:48 -04:00			`for q in questions`
			`analysis_prompt = """`
			`The following is a list of answers to a survey with one response per paragraph:`

			`# Antimicrobial usage survey open-ended question: $q`

			`$(`
			`join(`
			`[`
			`i == 1 ? "$a\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q]))`
			`],`
			`'\n'`
			`)`
			`)`

			`---`

			`Summarize the common themes between the survey responses.`
			`"""`

refactor: Add prompt-specific logic to function 2024-09-04 17:02:16 -04:00			`analysis_response = logged_prompt(analysis_prompt)`
wip: Make Ollama offloading step (broken) 2024-09-04 16:45:48 -04:00
			`end #for`


			`# Compile comments from all requested questions`
feat: Add response sorting script 2024-09-03 18:50:16 -04:00			`for q in questions`
			`open("$q.md", "w") do f`
			`write(f, "# Antimicrobial usage survey open-ended question: $q\n\n")`
			`for (i, a) in enumerate(skipmissing(survey_data[!, q]))`
			`if i == 1`
			`write(f, "$a\n\n")`
			`else`
			`write(f, "$a\n\n")`
			`end #if`
			`end #for`
			`end #do`

			run(`pandoc $q.md -o $q.docx`)

			`end #for`
feat: Add logging to script 2024-09-04 16:21:32 -04:00
			`# Close log file`
			`close(log_io)`