cboettig commited on
Commit
c2fd183
·
1 Parent(s): d2d3b8d

:construction: :construction:

Browse files
Files changed (5) hide show
  1. app.R +163 -201
  2. footer.md +0 -16
  3. system-prompt.md +0 -34
  4. test-app.R +44 -63
  5. test-data.R +13 -29
app.R CHANGED
@@ -3,16 +3,18 @@ library(bslib)
3
  library(htmltools)
4
  library(fontawesome)
5
  library(bsicons)
6
- library(gt)
7
- library(colourpicker)
8
  library(glue)
9
-
10
- library(ggplot2)
11
- library(readr)
12
  library(dplyr)
 
13
  library(mapgl)
14
- library(duckdbfs)
15
- duckdbfs::load_spatial()
 
 
 
16
 
17
  css <-
18
  HTML(paste0("<link rel='stylesheet' type='text/css' ",
@@ -26,63 +28,51 @@ ui <- page_sidebar(
26
  fillable = FALSE, # do not squeeze to vertical screen space
27
  tags$head(css),
28
  titlePanel("Demo App"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- "
31
- This is a proof-of-principle for a simple chat-driven interface
32
- to dynamically explore geospatial data.
33
- ",
34
 
35
  card(
36
  layout_columns(
37
  textInput("chat",
38
  label = NULL,
39
- "Which four counties in California have the highest average social vulnerability?",
40
  width = "100%"),
 
41
  div(
42
  actionButton("user_msg", "", icon = icon("paper-plane"),
43
  class = "btn-primary btn-sm align-bottom"),
44
  class = "align-text-bottom"),
45
- col_widths = c(11, 1)),
46
  fill = FALSE
 
47
  ),
48
 
49
  textOutput("agent"),
50
 
51
-
52
- layout_columns(
53
- card(maplibreOutput("map")),
54
- card(includeMarkdown("## Plot"),
55
- plotOutput("chart1"),
56
- plotOutput("chart2"),
57
- ),
58
- col_widths = c(8, 4),
59
- row_heights = c("500px"),
60
- max_height = "600px"
61
- ),
62
-
63
- gt_output("table"),
64
-
65
- card(fill = TRUE,
66
- card_header(fa("robot"), textOutput("model", inline = TRUE)),
67
- accordion(
68
- open = FALSE,
69
- accordion_panel(
70
- title = "show sql",
71
- icon = fa("terminal"),
72
- verbatimTextOutput("sql_code"),
73
- ),
74
- accordion_panel(
75
- title = "explain",
76
- icon = fa("user", prefer_type="solid"),
77
- textOutput("explanation"),
78
- )
79
- ),
80
- ),
81
- card(
82
- card_header("Errata"),
83
- shiny::markdown(readr::read_file("footer.md")),
84
- ),
85
  sidebar = sidebar(
 
 
 
 
 
86
  selectInput(
87
  "select",
88
  "Select an LLM:",
@@ -91,192 +81,164 @@ ui <- page_sidebar(
91
  "Gorilla (UC Berkeley)" = "gorilla"
92
  )
93
  ),
94
-
95
- input_switch("redlines", "Redlined Areas", value = FALSE),
96
- input_switch("svi", "Social Vulnerability", value = TRUE),
97
- input_switch("richness", "Biodiversity Richness", value = FALSE),
98
- input_switch("rsr", "Biodiversity Range Size Rarity", value = FALSE),
99
-
100
- colourInput("color", "Select a color",
101
- value = "blue"),
102
- sliderInput("slider", "Show BIR74 values above:",
103
- value = 248, min = 248, max = 21588),
104
-
 
 
 
 
 
 
 
105
  card(
106
  card_header(bs_icon("github"), "Source code:"),
107
- a(href = "https://github.com/boettiger-lab/geo-llm-r",
108
- "https://github.com/boettiger-lab/geo-llm-r"))
109
  ),
110
 
111
  theme = bs_theme(version = "5")
112
  )
113
 
114
 
115
- repo <- "https://data.source.coop/cboettig/social-vulnerability"
116
- pmtiles <- glue("{repo}/2022/SVI2022_US_tract.pmtiles")
117
- parquet <- glue("{repo}/2022/SVI2022_US_tract.parquet")
118
- con <- duckdbfs::cached_connection()
119
- svi <- open_dataset(parquet, tblname = "svi") |> filter(RPL_THEMES > 0)
120
-
121
- safe_parse <- function(txt) {
122
- gsub("[\r\n]", " ", txt) |> gsub("\\s+", " ", x = _)
123
- }
124
-
125
-
126
- # helper utilities
127
- # faster/more scalable to pass maplibre the ids to refilter pmtiles,
128
- # than to pass it the full geospatial/sf object
129
- filter_column <- function(full_data, filtered_data, id_col = "FIPS") {
130
- if (nrow(filtered_data) < 1) return(NULL)
131
- values <- full_data |>
132
- inner_join(filtered_data, copy = TRUE) |>
133
- pull(id_col)
134
- # maplibre syntax for the filter of PMTiles
135
- list("in", list("get", id_col), list("literal", values))
136
- }
137
-
138
 
 
 
139
 
140
  # Define the server
141
  server <- function(input, output, session) {
 
 
 
 
 
 
 
 
 
142
 
143
- chart1_data <- svi |>
144
- group_by(COUNTY) |>
145
- summarise(mean_svi = mean(RPL_THEMES)) |>
146
- collect()
147
-
148
- chart1 <- chart1_data |>
149
- ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
150
- ggtitle("County-level vulnerability nation-wide")
151
-
152
- data <- reactiveValues(df = tibble())
153
- output$chart1 <- renderPlot(chart1)
154
-
155
- model <- reactive(input$select)
156
- output$model <- renderText(input$select)
157
- observe({
158
- schema <- read_file("schema.yml")
159
- system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
160
- .open = "<", .close = ">")
161
- chat <- ellmer::chat_vllm(
162
- base_url = "https://llm.nrp-nautilus.io/",
163
- model = model(),
164
- api_key = Sys.getenv("NRP_API_KEY"),
165
- system_prompt = system_prompt,
166
- api_args = list(temperature = 0)
167
- )
168
-
169
- observeEvent(input$user_msg, {
170
- stream <- chat$chat(input$chat)
171
 
172
- # Parse response
173
- response <- jsonlite::fromJSON(safe_parse(stream))
174
- #response <- jsonlite::fromJSON(stream)
 
175
 
176
- if ("query" %in% names(response)) {
177
- output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
178
- output$explanation <- renderText(response$explanation)
179
 
180
- # Actually execute the SQL query generated:
181
- df <- DBI::dbGetQuery(con, response$query)
 
 
 
182
 
183
- # don't display shape column in render
184
- df <- df |> select(-any_of("Shape"))
185
- output$table <- render_gt(df, height = 300)
186
 
 
187
 
188
- y_axis <- colnames(df)[!colnames(df) %in% colnames(svi)]
189
- chart2 <- df |>
190
- rename(social_vulnerability = y_axis) |>
191
- ggplot(aes(social_vulnerability)) +
192
- geom_density(fill = "darkred") +
193
- xlim(c(0, 1)) +
194
- ggtitle("Vulnerability of selected areas")
195
 
196
- output$chart2 <- renderPlot(chart2)
197
 
198
- # We need to somehow trigger this df to update the map.
199
- data$df <- df
 
 
 
 
 
 
 
200
 
201
- # Note: ellmer will preserve full chat history automatically.
202
- # this can confuse the agent and mess up behavior, so we reset:
203
- chat$set_turns(NULL)
204
 
205
- } else {
206
- output$agent <- renderText(response$agent)
207
-
208
- }
209
 
210
- })
211
- })
 
 
 
 
 
212
 
213
 
214
- output$map <- renderMaplibre({
 
215
 
216
- m <- maplibre(center = c(-104.9, 40.3), zoom = 3, height = "400")
217
- if (input$redlines) {
218
- m <- m |>
219
- add_fill_layer(
220
- id = "redlines",
221
- source = list(type = "vector",
222
- url = paste0("pmtiles://", "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles")),
223
- source_layer = "mappinginequality",
224
- fill_color = list("get", "fill")
225
- )
226
- }
227
- if (input$richness) {
228
- m <- m |>
229
- add_raster_source(id = "richness",
230
- tiles = "https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png",
231
- maxzoom = 11
232
- ) |>
233
- add_raster_layer(id = "richness-layer",
234
- source = "richness")
235
 
236
- }
 
 
237
 
238
- if (input$rsr) {
239
- m <- m |>
240
- add_raster_source(id = "rsr",
241
- tiles = "https://data.source.coop/cboettig/mobi/tiles/green/range-size-rarity-all/{z}/{x}/{y}.png",
242
- maxzoom = 11
243
- ) |>
244
- add_raster_layer(id = "richness-layer",
245
- source = "rsr")
246
 
 
 
247
  }
248
- if (input$svi) {
249
- m <- m |>
250
- add_fill_layer(
251
- id = "svi_layer",
252
- source = list(type = "vector",
253
- url = paste0("pmtiles://", pmtiles)),
254
- source_layer = "svi",
255
- filter = filter_column(svi, data$df, "FIPS"),
256
- fill_opacity = 0.5,
257
- fill_color = interpolate(column = "RPL_THEMES",
258
- values = c(0, 1),
259
- stops = c("lightpink", "darkred"),
260
- na_color = "lightgrey")
261
- )
262
- }
263
- m |>
264
- add_draw_control() |>
265
- add_geocoder_control()
266
-
267
- })
268
-
269
- observeEvent(input$color, {
270
- maplibre_proxy("map") |>
271
- set_paint_property("svi_layer", "fill-color", input$color)
272
- })
273
-
274
- observeEvent(input$slider, {
275
- maplibre_proxy("map") |>
276
- set_filter("svi_layer",
277
- list(">=", get_column("BIR74"), input$slider))
278
- })
279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
 
282
  # Run the app
 
3
  library(htmltools)
4
  library(fontawesome)
5
  library(bsicons)
 
 
6
  library(glue)
7
+ library(sf)
8
+ library(duckdb.agent)
9
+ library(duckdbfs)
10
  library(dplyr)
11
+ library(ellmer)
12
  library(mapgl)
13
+ library(digest)
14
+ library(stringr)
15
+ library(shinybusy)
16
+
17
+ duckdbfs::close_connection()
18
 
19
  css <-
20
  HTML(paste0("<link rel='stylesheet' type='text/css' ",
 
28
  fillable = FALSE, # do not squeeze to vertical screen space
29
  tags$head(css),
30
  titlePanel("Demo App"),
31
+ shinybusy::add_busy_spinner(),
32
+
33
+ p("
34
+ Select a desired area with the draw tools on the map, then hit 'Set Area of Interest' to select.
35
+ Then, enter your query in the text box below the map to count occurrences of your specified taxonomic group.
36
+ Use the airplane button to sned your query. The computation may take a few minutes depending on the size and scale of
37
+ the search.
38
+ "),
39
+
40
+ p("
41
+ Scroll to zoom, ctrl+click to pitch and rotate. Hitting the area button with no selection to include the entire map.
42
+ "),
43
+
44
+ layout_columns(
45
+ card(maplibreOutput("map")),
46
+ div(actionButton("get_features", "Set Area Of Interest", icon = icon("object-group"),
47
+ class = "btn-primary align-bottom")),
48
+ col_widths = c(11,1)
49
+ ),
50
 
 
 
 
 
51
 
52
  card(
53
  layout_columns(
54
  textInput("chat",
55
  label = NULL,
56
+ "show all bird occurrences at zoom level 6",
57
  width = "100%"),
58
+
59
  div(
60
  actionButton("user_msg", "", icon = icon("paper-plane"),
61
  class = "btn-primary btn-sm align-bottom"),
62
  class = "align-text-bottom"),
63
+ col_widths = c(11, 1),
64
  fill = FALSE
65
+ ),
66
  ),
67
 
68
  textOutput("agent"),
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  sidebar = sidebar(
71
+ card(fill = TRUE,
72
+ card_header("Selected area:"),
73
+ verbatimTextOutput("feature_output")
74
+ ),
75
+
76
  selectInput(
77
  "select",
78
  "Select an LLM:",
 
81
  "Gorilla (UC Berkeley)" = "gorilla"
82
  )
83
  ),
84
+ card(fill = TRUE,
85
+ card_header(fa("robot"), textOutput("model", inline = TRUE)),
86
+ accordion(
87
+ open = TRUE,
88
+ accordion_panel(
89
+ HTML("<span, class='text-info'>Show SQL query</span>"),
90
+ icon = fa("terminal"),
91
+ verbatimTextOutput("sql_code")
92
+ ),
93
+ accordion_panel(
94
+ title = "Explain query",
95
+ icon = fa("user", prefer_type = "solid"),
96
+ textOutput("explanation")
97
+ )
98
+ )
99
+ ),
100
+
101
+
102
  card(
103
  card_header(bs_icon("github"), "Source code:"),
104
+ a(href = "https://github.com/boettiger-lab/biodiversity-justice",
105
+ "https://github.com/boettiger-lab/biodiversity-justice"))
106
  ),
107
 
108
  theme = bs_theme(version = "5")
109
  )
110
 
111
 
112
+ duckdb_secrets(Sys.getenv("MINIO_KEY"),
113
+ Sys.getenv("MINIO_SECRET"),
114
+ "minio.carlboettiger.info")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ gbif <- open_dataset("s3://public-gbif/2024-10-01", tblname = "gbif")
117
+ bounds <- ""
118
 
119
  # Define the server
120
  server <- function(input, output, session) {
121
+ output$map <- renderMaplibre({
122
+ m <- maplibre(center=c(-110, 38), zoom = 3, pitch = 0) |>
123
+ add_draw_control() |>
124
+ add_geocoder_control() #|>
125
+ # set_projection("globe")
126
+
127
+ m
128
+ })
129
+ observeEvent(input$get_features, {
130
 
131
+ drawn_features <- get_drawn_features(mapboxgl_proxy("map"))
132
+ if(nrow(drawn_features) > 0) {
133
+ bounds <- st_bbox(drawn_features)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ # print(bounds)
136
+ output$feature_output <- renderPrint({
137
+ print(bounds)
138
+ })
139
 
 
 
 
140
 
141
+ attach(as.list(bounds))
142
+ gbif_aoi <- gbif |>
143
+ dplyr::filter(between(decimallatitude, ymin, ymax),
144
+ between(decimallongitude, xmin, xmax)) |>
145
+ as_view("gbif_aoi")
146
 
147
+ }
 
 
148
 
149
+ observeEvent(input$user_msg, {
150
 
 
 
 
 
 
 
 
151
 
 
152
 
153
+ system_prompt = create_prompt(additional_instructions =
154
+ "Note that the columns h1, h2, h3, through h11 contains a geohash representing a H3 hexagon index.
155
+ Higher numbers indicate higher zoom resolution (smaller hexes)
156
+ Always aggregate results to count the number of rows matching
157
+ the query to the desired hexagon. Always name the count column 'count'.
158
+ Remember to group by hexagon level to aggregate!
159
+ Always rename the chosen hexagon column as 'h3id' in your final answer.
160
+ Always use table notation like 'gbif.order' to specify column names.
161
+ Be sure to generate fully valid SQL. Check your SQL for possible errors.
162
 
163
+ Always use the table 'gbif_aoi' rather than 'gbif' table if both are present.
 
 
164
 
165
+ IMPORTANT: return raw JSON only, do not decorate your reply with markdown code syntax.
166
+ ")
 
 
167
 
168
+ agent <- ellmer::chat_vllm(
169
+ base_url = "https://llm.cirrus.carlboettiger.info/v1/",
170
+ model = "kosbu/Llama-3.3-70B-Instruct-AWQ",
171
+ api_key = Sys.getenv("CIRRUS_LLM_KEY"),
172
+ system_prompt = system_prompt,
173
+ api_args = list(temperature = 0)
174
+ )
175
 
176
 
177
+ print("Agent thinking...")
178
+ stream <- agent$chat(input$chat)
179
 
180
+ # Parse response
181
+ response <- jsonlite::fromJSON(stream)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ if ("query" %in% names(response)) {
184
+ output$sql_code <- renderText(str_wrap(response$query, width = 60))
185
+ output$explanation <- renderText(response$explanation)
186
 
187
+ # clear agent memory
188
+ agent$set_turns(NULL)
 
 
 
 
 
 
189
 
190
+ } else {
191
+ output$agent <- renderText(response$agent)
192
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+
195
+ # cache the query
196
+ query_id <- digest::digest(paste(response$query, bounds, collapse=""))
197
+ data_url <- glue::glue("https://minio.carlboettiger.info/public-data/cache/{query_id}.h3j")
198
+ output$url <- renderText(data_url)
199
+
200
+ cache_parquet <- glue("{query_id}.parquet")
201
+
202
+ # compute if not yet in chache
203
+ status <- httr::status_code(httr::HEAD(data_url))
204
+ if(status == 404) {
205
+ print("Computing...")
206
+ time <- bench::bench_time({
207
+ agent_query(stream) |> write_dataset(cache_parquet)
208
+ })
209
+ print(time)
210
+ }
211
+
212
+ # draw on map
213
+ h3j <- glue("s3://public-data/cache/{query_id}.h3j")
214
+ open_dataset(cache_parquet) |> to_h3j(h3j)
215
+
216
+ # override previous map with drawn map
217
+ output$map <- renderMaplibre({
218
+ m <- maplibre(center=c(-110, 38), zoom = 3, pitch = 0, maxZoom = 9) |>
219
+ add_h3j_source("h3j_source",
220
+ url = data_url) |>
221
+ add_fill_extrusion_layer(
222
+ id = "h3j_layer",
223
+ source = "h3j_source",
224
+ tooltip = "count",
225
+ fill_extrusion_color = interpolate(
226
+ column = "count",
227
+ values = c(0, 1000),
228
+ stops = c("#430254", "#f83c70")
229
+ ),
230
+ fill_extrusion_height = list(
231
+ "interpolate",
232
+ list("linear"),
233
+ list("zoom"),
234
+ 0, 0, 1000,
235
+ list("*", 10, list("get", "count"))
236
+ ),
237
+ fill_extrusion_opacity = 0.7
238
+ )
239
+ }) # close renderMaplibre
240
+ }) # close observeEvent->get_features
241
+ }) # close observeEvent->user_msg
242
  }
243
 
244
  # Run the app
footer.md DELETED
@@ -1,16 +0,0 @@
1
- #### Credits
2
-
3
- Developed by Carl Boettiger, UC Berkeley, 2025. BSD License.
4
-
5
- Data from the US Census and CDC's [Social Vulnerability Index](https://www.atsdr.cdc.gov/place-health/php/svi/index.html)
6
-
7
- #### Technical details
8
-
9
- The app is written entirely in R using shiny. The app will translate natural language queries in SQL code using
10
- a small open-weights language model. The SQL code is executed using the duckdb backend against cloud-native
11
- geoparquet snapshot of the Social Vulnerability Index hosted on Source Cooperative. Summary chart data are also
12
- computed in duckdb by streaming, providing responsive updates while needing minimal RAM or disk storage despite
13
- the large size of the data sources.
14
-
15
- The map is rendered and updated using MapLibre with PMTiles, which provides responsive rendering for large feature sets.
16
- The PMTiles layer is also hosted on Source cooperative where it can be streamed efficiently.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
system-prompt.md DELETED
@@ -1,34 +0,0 @@
1
-
2
- You are a helpful agent who always replies strictly in JSON-formatted text.
3
- Your task is to translate the user's questions about the data into a SQL query
4
- that will be run against the "biodiversity_occurrences" table in a duckdb database.
5
- The duckdb database has a spatial extension which understands PostGIS operations as well.
6
-
7
- If your answer involves the construction of a SQL query, you must format your answer as follows:
8
-
9
- {
10
- "query": "your raw SQL response goes here",
11
- "explanation": "your explanation of the query"
12
- }
13
-
14
- If your answer does not involve a SQL query, please reply with the following format instead:
15
-
16
- {
17
- "user": "user question goes here",
18
- "agent": "your response goes here"
19
- }
20
-
21
- If you are asked to describe the data or for information about the data schema, give only a human-readable response with SQL.
22
-
23
- In the data, each row represents an individual occurrence of a species. The occurrences
24
- are geocoded to US Census counties, with the STATE, COUNTY, and FIPS columns indicating
25
- the corresponding state name, county name, and FIPS identifier for the specific County.
26
- The FIPS column is an 5-digit number that uniquely identifies a county in a state.
27
- Taxonomic classification of the species is given in the corresponding columns, kingdom,
28
- phylum, class, order, family, genus, and species.
29
-
30
- The data also includes information about various measures of social vulnerability (RPL_THEMES).
31
- Pay attention to the DESCRIPTION of each of the columns (VARIABLE_NAME) from the metadata table:
32
- <schema>
33
-
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test-app.R CHANGED
@@ -1,26 +1,24 @@
1
- ## Illustrate/test core app functionality without shiny
2
-
3
- library(tidyverse)
4
  library(duckdbfs)
5
- library(mapgl)
6
  library(ellmer)
7
- library(glue)
8
 
9
- repo <- "https://data.source.coop/cboettig/social-vulnerability"
10
- pmtiles <- glue("{repo}/svi2020_us_tract.pmtiles")
11
- duckdb_s3_config(s3_endpoint = "minio.carlboettiger.info")
12
- svi <-
13
- open_dataset("s3://public-gbif/svi", tblname = "biodiversity_occurrences") |>
14
- filter(RPL_THEMES > 0)
15
- schema <- read_file("schema.yml")
16
- system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
17
- .open = "<", .close = ">")
18
 
 
 
 
 
 
 
19
 
20
 
 
 
 
 
 
21
 
22
- # Or optionally test with cirrus
23
- chat <- ellmer::chat_vllm(
24
  base_url = "https://llm.cirrus.carlboettiger.info/v1/",
25
  model = "kosbu/Llama-3.3-70B-Instruct-AWQ",
26
  api_key = Sys.getenv("CIRRUS_LLM_KEY"),
@@ -28,55 +26,38 @@ chat <- ellmer::chat_vllm(
28
  api_args = list(temperature = 0)
29
  )
30
 
31
- # or use the NRP model
32
- chat <- ellmer::chat_vllm(
33
- base_url = "https://llm.nrp-nautilus.io/",
34
- model = "olmo",
35
- api_key = Sys.getenv("NRP_API_KEY"),
36
- system_prompt = system_prompt,
37
- api_args = list(temperature = 0)
38
- )
39
-
40
- cols <- colnames(svi)
41
- rpls <- grep("RPL_THEME.+", cols)
42
- keep <- cols[c(1:9, rpls, 161:226)]
43
- biodiversity <- svi |> select(all_of(keep))
44
-
45
- # Test a chat-based response
46
- chat$chat("Which columns describes racial components of social vulnerability?")
47
- chat$set_turns(NULL)
48
- ## A query-based response
49
- stream <- chat$chat("Which counties have the most bird observations?")
50
- stream <- chat$chat("Give me the number bird observations per county vs county social vulnerability")
51
- response <- jsonlite::fromJSON(stream)
52
-
53
 
54
- stream2 <- chat$chat("Great, now give me the ggplot2 code to plot the data.frame you returned as those counts vs social vulnerability as points. Be sure to place the R code for your reply by itself in a 'code' element of the JSON")
 
 
55
 
56
- response <- jsonlite::fromJSON(stream2)
57
 
58
- con <- duckdbfs::cached_connection()
59
- filtered_data <- DBI::dbGetQuery(con, response$query)
60
 
61
- filter_column <- function(full_data, filtered_data, id_col) {
62
- if (nrow(filtered_data) < 1) return(NULL)
63
- values <- full_data |>
64
- inner_join(filtered_data, copy = TRUE) |>
65
- pull(id_col)
66
- # maplibre syntax for the filter of PMTiles
67
- list("in", list("get", id_col), list("literal", values))
68
- }
69
-
70
- maplibre(center = c(-102.9, 41.3), zoom = 3) |>
71
- add_fill_layer(
72
- id = "svi_layer",
73
- source = list(type = "vector", url = paste0("pmtiles://", pmtiles)),
74
- source_layer = "SVI2000_US_tract",
75
- filter = filter_column(full_data, filtered_data, "FIPS"),
76
- fill_opacity = 0.5,
77
- fill_color = interpolate(column = "RPL_THEMES",
78
- values = c(0, 1),
79
- stops = c("#e19292c0", "darkblue"),
80
- na_color = "lightgrey")
81
- )
 
 
 
 
82
 
 
1
+ library(duckdb.agent)
 
 
2
  library(duckdbfs)
3
+ library(dplyr)
4
  library(ellmer)
 
5
 
 
 
 
 
 
 
 
 
 
6
 
7
+ duckdb_secrets(Sys.getenv("MINIO_KEY"),
8
+ Sys.getenv("MINIO_SECRET"),
9
+ "minio.carlboettiger.info")
10
+ gbif <- open_dataset("s3://public-gbif/2024-10-01", tblname = "gbif")
11
+ tracts_url <- "https://minio.carlboettiger.info/public-social-vulnerability/2022-tracts-h3-z8.parquet"
12
+ tracts_h3 <- open_dataset(tracts_url, tblname = "censustracts")
13
 
14
 
15
+ system_prompt = create_prompt(additional_instructions =
16
+ "Note that the column h8 contains a geohash representing a H3 hexagon index.
17
+ If asked for data that requires both tables, you should always seek to join on
18
+ the h8 column. Always aggregate results to count the number of rows matching
19
+ the query in each h8 hexagon")
20
 
21
+ agent <- ellmer::chat_vllm(
 
22
  base_url = "https://llm.cirrus.carlboettiger.info/v1/",
23
  model = "kosbu/Llama-3.3-70B-Instruct-AWQ",
24
  api_key = Sys.getenv("CIRRUS_LLM_KEY"),
 
26
  api_args = list(temperature = 0)
27
  )
28
 
29
+ resp <- agent$chat("Birds in Yolo County")
30
+ out <- agent_query(resp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ bench::bench_time({
33
+ out |> rename(h3id = h6) |> to_h3j("s3://public-data/test5.h3j")
34
+ })
35
 
 
36
 
 
 
37
 
38
+ library(mapgl)
39
+ url = "https://minio.carlboettiger.info/public-data/test5.h3j"
40
+ maplibre(center=c(-110, 38), zoom = 3, pitch = 30) |>
41
+ add_h3j_source("h3j_source",
42
+ url = url
43
+ ) |>
44
+ add_fill_extrusion_layer(
45
+ id = "h3j_layer",
46
+ source = "h3j_source",
47
+ fill_extrusion_color = interpolate(
48
+ column = "count",
49
+ values = c(0, 1000),
50
+ stops = c("#430254", "#f83c70")
51
+ ),
52
+ fill_extrusion_height = list(
53
+ "interpolate",
54
+ list("linear"),
55
+ list("zoom"),
56
+ 0,
57
+ 0,
58
+ 100,
59
+ list("*", 2, list("get", "count"))
60
+ ),
61
+ fill_extrusion_opacity = 0.7
62
+ )
63
 
test-data.R CHANGED
@@ -1,39 +1,23 @@
1
- library(dplyr)
2
- library(duckdbfs)
3
- library(mapgl)
4
 
5
- pad_tract = open_dataset("https://minio.carlboettiger.info/public-biodiversity/pad-us-4/pad-by-tract.parquet")
6
- pad = open_dataset("https://minio.carlboettiger.info/public-biodiversity/pad-us-4/pad-us-4.parquet")
7
- pmtiles = "https://minio.carlboettiger.info/public-biodiversity/pad-us-4/pad-us-4.pmtiles"
8
 
9
- #svi = open_dataset("https://data.source.coop/cboettig/social-vulnerability/2022/SVI2022_US_tract.parquet")
 
 
 
10
 
11
 
12
- State = "Colorado"
13
- filtered_data = pad_tract |> filter(STATE == State) |> select(Unit_Nm)
14
- full_data = pad |> filter(GAP_Sts %in% c("1", "2"))
15
 
 
 
 
16
 
17
 
18
 
19
- filter_column <- function(full_data, filtered_data, id_col) {
20
- #if (nrow(filtered_data) < 1) return(NULL)
21
- values <- full_data |>
22
- inner_join(filtered_data, copy = TRUE) |>
23
- pull(id_col)
24
- # maplibre syntax for the filter of PMTiles
25
- list("in", list("get", id_col), list("literal", values))
26
- }
27
 
28
- maplibre(center = c(-102.9, 41.3), zoom = 3) |>
29
- add_fill_layer(
30
- id = "pad",
31
- source = list(type = "vector", url = paste0("pmtiles://", pmtiles)),
32
- source_layer = "padus4",
33
- tooltip = c("Unit_Nm"),
34
- # filter = list("in", list("get", "GAP_Sts"), list("literal", values)),
35
- filter = filter_column(full_data, filtered_data, "row_n"),
36
- fill_opacity = 0.5,
37
- fill_color = "darkgreen"
38
- )
39
 
 
 
 
 
 
1
 
2
+ library(duckdbfs)
3
+ library(dplyr)
 
4
 
5
+ library(sf)
6
+ library(spData)
7
+ bounds <- spData::us_states |> dplyr::filter(NAME == "Arizona") |> sf::st_bbox()
8
+ attach(as.list(bounds))
9
 
10
 
11
+ duckdb_secrets(Sys.getenv("MINIO_KEY"),
12
+ Sys.getenv("MINIO_SECRET"),
13
+ "minio.carlboettiger.info")
14
 
15
+ gbif <- open_dataset("s3://public-gbif/2024-10-01", tblname = "gbif")
16
+ gbif_aoi <-
17
+ gbif |> dplyr::filter(decimallatitude > 0) |> show_query()
18
 
19
 
20
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ gbif_aoi |> show_query()