diff --git a/.gitignore b/.gitignore index 0e831c1..f4e4c53 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,3 @@ docs/site/ data.tsv Q*.docx Q*.md -*.log diff --git a/Manifest.toml b/Manifest.toml index 2f98f2a..f70d59d 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,16 +2,7 @@ julia_version = "1.10.5" manifest_format = "2.0" -project_hash = "485bf8850ea9ae7d6a84e8f24d8d02ed87a19c2c" - -[[deps.AbstractTrees]] -git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" -uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.4.5" - -[[deps.ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" +project_hash = "7e38425d15a28e7abd87534dcfc793c08d63a4f4" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" @@ -19,11 +10,6 @@ uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" -[[deps.BitFlags]] -git-tree-sha1 = "0691e34b3bb8be9307330f88d1a3c3f25466c24d" -uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" -version = "0.1.9" - [[deps.CSV]] deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] git-tree-sha1 = "6c834533dc1fabd820c1db03c839bf97e45a3fab" @@ -51,12 +37,6 @@ deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" version = "1.1.1+0" -[[deps.ConcurrentUtilities]] -deps = ["Serialization", "Sockets"] -git-tree-sha1 = "ea32b83ca4fefa1768dc84e504cc0a94fb1ab8d1" -uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" -version = "2.4.2" - [[deps.Crayons]] git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" @@ -88,41 +68,24 @@ version = "1.0.0" deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" -[[deps.Downloads]] -deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" -version = "1.6.0" - -[[deps.ExceptionUnwrapping]] -deps = ["Test"] -git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" -uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" -version = "0.1.10" - [[deps.FilePathsBase]] deps = ["Compat", "Dates"] git-tree-sha1 = "7878ff7172a8e6beedd1dea14bd27c3c6340d361" uuid = "48062228-2e41-5def-b9a4-89aafe57970f" version = "0.9.22" -weakdeps = ["Mmap", "Test"] [deps.FilePathsBase.extensions] FilePathsBaseMmapExt = "Mmap" FilePathsBaseTestExt = "Test" -[[deps.FileWatching]] -uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + [deps.FilePathsBase.weakdeps] + Mmap = "a63ad114-7e13-5084-954f-fe012c677804" + Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.Future]] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" -[[deps.HTTP]] -deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] -git-tree-sha1 = "d1d712be3164d61d1fb98e7ce9bcbc6cc06b45ed" -uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" -version = "1.10.8" - [[deps.InlineStrings]] git-tree-sha1 = "45521d31238e87ee9f9732561bfee12d4eebd52d" uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" @@ -150,53 +113,11 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" version = "1.0.0" -[[deps.JLLWrappers]] -deps = ["Artifacts", "Preferences"] -git-tree-sha1 = "f389674c99bfcde17dc57454011aa44d5a260a40" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.6.0" - -[[deps.JSON3]] -deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] -git-tree-sha1 = "eb3edce0ed4fa32f75a0a11217433c31d56bd48b" -uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -version = "1.14.0" - - [deps.JSON3.extensions] - JSON3ArrowExt = ["ArrowTypes"] - - [deps.JSON3.weakdeps] - ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" - [[deps.LaTeXStrings]] git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" version = "1.3.1" -[[deps.LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" -version = "0.6.4" - -[[deps.LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "8.4.0+0" - -[[deps.LibGit2]] -deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[deps.LibGit2_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] -uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" -version = "1.6.4+0" - -[[deps.LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.11.0+1" - [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -204,30 +125,10 @@ uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -[[deps.Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[deps.LoggingExtras]] -deps = ["Dates", "Logging"] -git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" -uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" -version = "1.0.3" - [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" -[[deps.MbedTLS]] -deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] -git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" -uuid = "739be429-bea8-5141-9913-cc70e7f3736d" -version = "1.1.9" - -[[deps.MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+1" - [[deps.Missings]] deps = ["DataAPI"] git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" @@ -237,37 +138,11 @@ version = "1.2.0" [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" -[[deps.MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2023.1.10" - -[[deps.NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" -version = "1.2.0" - -[[deps.OpenAI]] -deps = ["Dates", "HTTP", "JSON3"] -git-tree-sha1 = "c66f597044ac6cd41cbf4b191d59abbaf2003d9f" -uuid = "e9f21f70-7185-4079-aca2-91159181367c" -version = "0.9.0" - [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.23+4" -[[deps.OpenSSL]] -deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] -git-tree-sha1 = "38cb508d080d21dc1128f7fb04f20387ed4c0af4" -uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" -version = "1.4.3" - -[[deps.OpenSSL_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl"] -git-tree-sha1 = "a028ee3cb5641cccc4c24e90c36b0a4f7707bdf5" -uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" -version = "3.0.14+0" - [[deps.OrderedCollections]] git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" @@ -279,11 +154,6 @@ git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.8.1" -[[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.10.0" - [[deps.PooledArrays]] deps = ["DataAPI", "Future"] git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" @@ -312,28 +182,6 @@ version = "2.3.2" deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" -[[deps.PromptingTools]] -deps = ["AbstractTrees", "Base64", "Dates", "HTTP", "JSON3", "Logging", "OpenAI", "Pkg", "PrecompileTools", "Preferences", "REPL", "Random", "Test"] -git-tree-sha1 = "701d4c30e0af85f7eb685e5930ec85e8810dee37" -uuid = "670122d1-24a8-4d70-bfce-740807c42192" -version = "0.51.0" - - [deps.PromptingTools.extensions] - FlashRankPromptingToolsExt = ["FlashRank"] - GoogleGenAIPromptingToolsExt = ["GoogleGenAI"] - MarkdownPromptingToolsExt = ["Markdown"] - RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra", "Unicode"] - SnowballPromptingToolsExt = ["Snowball"] - - [deps.PromptingTools.weakdeps] - FlashRank = "22cc3f58-1757-4700-bb45-2032706e5a8d" - GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab" - LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" - Snowball = "fb8f903a-0164-4e73-9ffe-431110250c3b" - SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -360,11 +208,6 @@ version = "1.4.5" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -[[deps.SimpleBufferStream]] -git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" -uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" -version = "1.1.0" - [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" @@ -390,12 +233,6 @@ git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" version = "0.3.4" -[[deps.StructTypes]] -deps = ["Dates", "UUIDs"] -git-tree-sha1 = "159331b30e94d7b11379037feeb9b690950cace8" -uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" -version = "1.11.0" - [[deps.SuiteSparse_jll]] deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" @@ -418,25 +255,11 @@ git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" version = "1.12.0" -[[deps.Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" -version = "1.10.0" - -[[deps.Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - [[deps.TranscodingStreams]] git-tree-sha1 = "e84b3a11b9bece70d14cce63406bbc79ed3464d2" uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" version = "0.11.2" -[[deps.URIs]] -git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" -uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" -version = "1.5.1" - [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" @@ -464,13 +287,3 @@ version = "1.2.13+1" deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" version = "5.11.0+0" - -[[deps.nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.52.0+1" - -[[deps.p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" -version = "17.4.0+2" diff --git a/Project.toml b/Project.toml index d00e522..bbdd196 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,3 @@ [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" -PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192" diff --git a/main.jl b/main.jl index 46eceec..6bd0990 100755 --- a/main.jl +++ b/main.jl @@ -1,15 +1,6 @@ #!/usr/bin/env julia using CSV using DataFrames -using Dates -using JSON3 -using Logging -using PromptingTools - -# Set logging to file -log_io = open("antimicrobial-use-survey-analysis_$(now()).log", "w+") -logger = SimpleLogger(log_io) -global_logger(logger) # Import data survey_data = DataFrame(CSV.File("data.tsv"; delim='\t', normalizenames=true)) @@ -25,103 +16,8 @@ end #for # infer types # deleteat!(survey_data, [1,2]) -# Compile comments from all questions and analyze -# We will be offloading the analysis to Ollama running Llama3.1 locally -questions = [:Q8, :Q16, :Q29, :Q30] - -function logged_prompt(prompt) - @info "Prompting Llama3.1 with \n```$prompt\n```\n" - response = aigenerate( - PromptingTools.OllamaSchema(), - prompt; - model="llama3.1", - api_kwargs=(; options=(; num_gpu=99)) - ).content - @info "Llama3.1 responsed with \n```\n$response\n```\n" - return response -end #function - -question_dict = Dict() - -for q in questions - # Summarize the major themes among all answers - analysis_prompt = """ - The following is a list of answers to a survey with one response per paragraph: - - # Antimicrobial usage survey open-ended question: $q - - $( - join( - [ - i == 1 ? "**$a**\n" : "$a\n" for (i, a) in enumerate(skipmissing(survey_data[!, q])) - ], - '\n' - ) - ) - - --- - - Summarize the common themes between the survey responses. - """ - - analysis_response = logged_prompt(analysis_prompt) - - - # Compile all themes that Llama3 identified. - # Llama3 tends to summarize each theme with a bolded statement. - # We will extract the bolded statements to compile themes - themes = String[] - for l in eachline(IOBuffer(analysis_response)) - m = match(r"^[1-9]+\. \*\*(.+)\*\*:", l) - isnothing(m) || push!(themes, first(m)) - end #for - @info "Found themes $themes" - - answer_dict = Dict() - - # Now go back through each answer and check if it is addressing the theme noted - for (i, a) in enumerate(skipmissing(survey_data[!, q])) - i == 1 && continue #first "answer" is the question - - theme_dict = Dict{String,Union{Bool,Missing}}() - - for t in themes - theme_prompt = """ - The following was answered as a free-response answer on a survey: - - $a - - --- - - Does this answer deal with the theme of $t? Answer yes or no. - """ - - theme_response = logged_prompt(theme_prompt) - - if startswith(lowercase(theme_response), "yes") - theme_dict[t] = true - elseif startswith(lowercase(theme_response), "no") - theme_dict[t] = false - else - theme_dict[t] = missing - end #if - end #for - - answer_dict[a] = theme_dict - - end #for - - question_dict[q] = answer_dict - -end #for - - -open("results.json", "w") do io - JSON3.write(io, question_dict) -end - - # Compile comments from all requested questions +questions = [:Q8, :Q16, :Q29, :Q30] for q in questions open("$q.md", "w") do f write(f, "# Antimicrobial usage survey open-ended question: $q\n\n") @@ -137,6 +33,3 @@ for q in questions run(`pandoc $q.md -o $q.docx`) end #for - -# Close log file -close(log_io)