cboettig commited on
Commit
3cf94ac
·
1 Parent(s): 639cb71
Files changed (9) hide show
  1. .github/workflows/deploy.yml +1 -1
  2. .gitignore +2 -0
  3. app.R +36 -250
  4. inat-ranges.R +50 -0
  5. geo-llm-r.Rproj → inat-ranges.Rproj +1 -4
  6. preprocess.R +41 -0
  7. test-app.R +58 -0
  8. test.R +97 -63
  9. utils.R +127 -1
.github/workflows/deploy.yml CHANGED
@@ -17,4 +17,4 @@ jobs:
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: git push -f https://cboettig:[email protected]/spaces/boettiger-lab/geo-llm-r main
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push -f https://cboettig:[email protected]/spaces/boettiger-lab/inat-ranges
.gitignore CHANGED
@@ -47,3 +47,5 @@ po/*~
47
 
48
  # RStudio Connect folder
49
  rsconnect/
 
 
 
47
 
48
  # RStudio Connect folder
49
  rsconnect/
50
+
51
+ .tmp/*
app.R CHANGED
@@ -1,272 +1,58 @@
1
  library(shiny)
2
- library(bslib)
3
- library(htmltools)
4
- library(fontawesome)
5
- library(bsicons)
6
- library(gt)
7
- library(colourpicker)
8
- library(glue)
9
-
10
- library(ggplot2)
11
- library(readr)
12
- library(dplyr)
13
  library(mapgl)
14
- library(duckdbfs)
15
- duckdbfs::load_spatial()
16
-
17
- css <-
18
- HTML(paste0("<link rel='stylesheet' type='text/css' ",
19
- "href='https://demos.creative-tim.com/",
20
- "material-dashboard/assets/css/",
21
- "material-dashboard.min.css?v=3.2.0'>"))
22
-
23
-
24
- # Define the UI
25
- ui <- page_sidebar(
26
- fillable = FALSE, # do not squeeze to vertical screen space
27
- tags$head(css),
28
- titlePanel("Demo App"),
29
-
30
- "
31
- This is a proof-of-principle for a simple chat-driven interface
32
- to dynamically explore geospatial data.
33
- ",
34
 
35
- card(
36
- layout_columns(
37
- textInput("chat",
38
- label = NULL,
39
- "Which four counties in California have the highest average social vulnerability?",
40
- width = "100%"),
41
- div(
42
- actionButton("user_msg", "", icon = icon("paper-plane"),
43
- class = "btn-primary btn-sm align-bottom"),
44
- class = "align-text-bottom"),
45
- col_widths = c(11, 1)),
46
- fill = FALSE
47
- ),
48
-
49
- textOutput("agent"),
50
 
 
 
51
 
52
- layout_columns(
53
- card(maplibreOutput("map")),
54
- card(includeMarkdown("## Plot"),
55
- plotOutput("chart1"),
56
- plotOutput("chart2"),
57
- ),
58
- col_widths = c(8, 4),
59
- row_heights = c("500px"),
60
- max_height = "600px"
61
- ),
62
 
63
- gt_output("table"),
 
64
 
65
- card(fill = TRUE,
66
- card_header(fa("robot"), textOutput("model", inline = TRUE)),
67
- accordion(
68
- open = FALSE,
69
- accordion_panel(
70
- title = "show sql",
71
- icon = fa("terminal"),
72
- verbatimTextOutput("sql_code"),
73
- ),
74
- accordion_panel(
75
- title = "explain",
76
- icon = fa("user", prefer_type="solid"),
77
- textOutput("explanation"),
78
- )
79
- ),
80
- ),
81
- card(
82
- card_header("Errata"),
83
- shiny::markdown(readr::read_file("footer.md")),
84
- ),
85
- sidebar = sidebar(
86
- selectInput(
87
- "select",
88
- "Select an LLM:",
89
- list("LLama3" = "llama3",
90
- #"OLMO2 (AllenAI)" = "olmo",
91
- "Gorilla (UC Berkeley)" = "gorilla"
92
- )
93
- ),
94
 
95
- input_switch("redlines", "Redlined Areas", value = FALSE),
96
- input_switch("svi", "Social Vulnerability", value = TRUE),
97
- input_switch("richness", "Biodiversity Richness", value = FALSE),
98
- input_switch("rsr", "Biodiversity Range Size Rarity", value = FALSE),
99
 
 
 
100
 
101
- card(
102
- card_header(bs_icon("github"), "Source code:"),
103
- a(href = "https://github.com/boettiger-lab/geo-llm-r",
104
- "https://github.com/boettiger-lab/geo-llm-r"))
105
- ),
106
 
107
- theme = bs_theme(version = "5")
 
 
 
108
  )
109
 
110
-
111
- repo <- "https://data.source.coop/cboettig/social-vulnerability"
112
- pmtiles <- glue("{repo}/2022/SVI2022_US_tract.pmtiles")
113
- parquet <- glue("{repo}/2022/SVI2022_US_tract.parquet")
114
- con <- duckdbfs::cached_connection()
115
- svi <- open_dataset(parquet, tblname = "svi") |> filter(RPL_THEMES > 0)
116
-
117
- safe_parse <- function(txt) {
118
- gsub("[\r\n]", " ", txt) |> gsub("\\s+", " ", x = _)
119
- }
120
-
121
-
122
- # helper utilities
123
- # faster/more scalable to pass maplibre the ids to refilter pmtiles,
124
- # than to pass it the full geospatial/sf object
125
- filter_column <- function(full_data, filtered_data, id_col = "FIPS") {
126
- if (nrow(filtered_data) < 1) return(NULL)
127
- values <- full_data |>
128
- inner_join(filtered_data, copy = TRUE) |>
129
- pull(id_col)
130
- # maplibre syntax for the filter of PMTiles
131
- list("in", list("get", id_col), list("literal", values))
132
- }
133
-
134
-
135
-
136
- # Define the server
137
  server <- function(input, output, session) {
 
 
 
138
 
139
- chart1_data <- svi |>
140
- group_by(COUNTY) |>
141
- summarise(mean_svi = mean(RPL_THEMES)) |>
142
- collect()
143
-
144
- chart1 <- chart1_data |>
145
- ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
146
- ggtitle("County-level vulnerability nation-wide")
147
-
148
- data <- reactiveValues(df = tibble())
149
- output$chart1 <- renderPlot(chart1)
150
-
151
- model <- reactive(input$select)
152
- output$model <- renderText(input$select)
153
- observe({
154
- schema <- read_file("schema.yml")
155
- system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
156
- .open = "<", .close = ">")
157
- chat <- ellmer::chat_vllm(
158
- base_url = "https://llm.nrp-nautilus.io/",
159
- model = model(),
160
- api_key = Sys.getenv("NRP_API_KEY"),
161
- system_prompt = system_prompt,
162
- api_args = list(temperature = 0)
163
- )
164
-
165
- observeEvent(input$user_msg, {
166
- stream <- chat$chat(input$chat)
167
-
168
- # Parse response
169
- response <- jsonlite::fromJSON(safe_parse(stream))
170
- #response <- jsonlite::fromJSON(stream)
171
-
172
- if ("query" %in% names(response)) {
173
- output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
174
- output$explanation <- renderText(response$explanation)
175
-
176
- # Actually execute the SQL query generated:
177
- df <- DBI::dbGetQuery(con, response$query)
178
-
179
- # don't display shape column in render
180
- df <- df |> select(-any_of("Shape"))
181
- output$table <- render_gt(df, height = 300)
182
-
183
-
184
- y_axis <- colnames(df)[!colnames(df) %in% colnames(svi)]
185
- chart2 <- df |>
186
- rename(social_vulnerability = y_axis) |>
187
- ggplot(aes(social_vulnerability)) +
188
- geom_density(fill = "darkred") +
189
- xlim(c(0, 1)) +
190
- ggtitle("Vulnerability of selected areas")
191
-
192
- output$chart2 <- renderPlot(chart2)
193
-
194
- # We need to somehow trigger this df to update the map.
195
- data$df <- df
196
-
197
- # Note: ellmer will preserve full chat history automatically.
198
- # this can confuse the agent and mess up behavior, so we reset:
199
- chat$set_turns(NULL)
200
-
201
- } else {
202
- output$agent <- renderText(response$agent)
203
-
204
- }
205
-
206
- })
207
- })
208
-
209
-
210
- output$map <- renderMaplibre({
211
-
212
- m <- maplibre(center = c(-104.9, 40.3), zoom = 3, height = "400")
213
- if (input$redlines) {
214
- m <- m |>
215
- add_fill_layer(
216
- id = "redlines",
217
- source = list(type = "vector",
218
- url = paste0("pmtiles://", "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles")),
219
- source_layer = "mappinginequality",
220
- fill_color = list("get", "fill")
221
- )
222
- }
223
- if (input$richness) {
224
- m <- m |>
225
- add_raster_source(id = "richness",
226
- tiles = "https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png",
227
- maxzoom = 11
228
- ) |>
229
- add_raster_layer(id = "richness-layer",
230
- source = "richness")
231
-
232
- }
233
-
234
- if (input$rsr) {
235
- m <- m |>
236
- add_raster_source(id = "rsr",
237
- tiles = "https://data.source.coop/cboettig/mobi/tiles/green/range-size-rarity-all/{z}/{x}/{y}.png",
238
- maxzoom = 11
239
- ) |>
240
- add_raster_layer(id = "richness-layer",
241
- source = "rsr")
242
-
243
- }
244
- if (input$svi) {
245
- m <- m |>
246
- add_fill_layer(
247
- id = "svi_layer",
248
- source = list(type = "vector",
249
- url = paste0("pmtiles://", pmtiles)),
250
- source_layer = "svi",
251
- tooltip = "RPL_THEMES",
252
- filter = filter_column(svi, data$df, "FIPS"),
253
- fill_opacity = 0.5,
254
- fill_color = interpolate(column = "RPL_THEMES",
255
- values = c(0, 1),
256
- stops = c("lightpink", "darkred"),
257
- na_color = "lightgrey")
258
- )
259
- }
260
- m |>
261
- add_draw_control() |>
262
- add_geocoder_control()
263
 
264
- })
265
 
 
266
 
 
 
267
 
 
 
 
 
 
268
 
 
 
 
 
 
 
269
  }
270
 
271
- # Run the app
272
- shinyApp(ui = ui, server = server)
 
1
  library(shiny)
 
 
 
 
 
 
 
 
 
 
 
2
  library(mapgl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ source("utils.R")
5
+ source("inat-ranges.R")
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ load_h3()
8
+ load_spatial()
9
 
10
+ duckdbfs::duckdb_secrets()
11
+ inat <- open_dataset("s3://public-inat/hex")
 
 
 
 
 
 
 
 
12
 
13
+ # dummy case
14
+ aoi <- spData::us_states
15
 
16
+ taxa <- open_dataset("https://minio.carlboettiger.info/public-inat/taxonomy/taxa.csv", format = "csv", recursive = FALSE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
 
 
18
 
19
+ # publish richness at the aoi (bbox or poly)
20
+ m = maplibre(center = c(-110.5, 37), zoom = 3)
21
 
 
 
 
 
 
22
 
23
+ ui <- fluidPage(
24
+ maplibreOutput("map"),
25
+ actionButton("get_features", "Get Drawn Features"),
26
+ verbatimTextOutput("feature_output")
27
  )
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  server <- function(input, output, session) {
30
+ output$map <- renderMaplibre({
31
+ m
32
+ })
33
 
34
+ output$feature_output <- renderPrint({
35
+ print(input$map_bbox)
36
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
38
 
39
+ observeEvent(input$map_bbox, {
40
 
41
+ bbox =sf::st_bbox(unlist(input$map_bbox), crs = 4326)
42
+ print(bbox)
43
 
44
+ output$map <- renderMaplibre({
45
+ richness(inat, bbox)
46
+ richness_map(m)
47
+ })
48
+ })
49
 
50
+ # observeEvent(input$get_features, {
51
+ # drawn_features <- get_drawn_features(mapboxgl_proxy("map"))
52
+ # output$feature_output <- renderPrint({
53
+ # print(drawn_features)
54
+ # })
55
+ # })
56
  }
57
 
58
+ shinyApp(ui, server)
 
inat-ranges.R ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(dplyr)
2
+ library(duckdbfs)
3
+ library(mapgl)
4
+
5
+ # Also requires get_h3_aoi() from utils.R
6
+
7
+ richness <- function(inat, aoi) {
8
+
9
+ h3_aoi <- get_h3_aoi(aoi, precision = 4) |> select(h3id)
10
+
11
+ bench::bench_time({
12
+ inat |>
13
+ rename(h3id = h4) |>
14
+ inner_join(h3_aoi) |>
15
+ distinct(taxon_id, h3id) |>
16
+ group_by(h3id) |>
17
+ summarise(n = n()) |>
18
+ mutate(height = n / max(n)) |>
19
+ duckdbfs::to_h3j("s3://public-data/inat-tmp-ranges.h3j")
20
+ # write_dataset("s3://public-data/inat-tmp-ranges.parquet")
21
+ })
22
+ }
23
+
24
+ richness_map <- function(
25
+ m = maplibre(center = c(-110.5, 37), zoom = 3),
26
+ url = "https://minio.carlboettiger.info/public-data/inat-tmp-ranges.h3j"
27
+ ) {
28
+
29
+ m <- m |>
30
+ add_h3j_source("h3j_source",
31
+ url = url
32
+ ) |>
33
+ add_fill_extrusion_layer(
34
+ id = "h3j_layer",
35
+ source = "h3j_source",
36
+ tooltip = "n",
37
+ fill_extrusion_color = viridis_pal("height"),
38
+ fill_extrusion_height = list(
39
+ "interpolate",
40
+ list("linear"),
41
+ list("zoom"),
42
+ 0,
43
+ 0,
44
+ 1,
45
+ list("*", 100000, list("get", "height"))
46
+ ),
47
+ fill_extrusion_opacity = 0.7
48
+ )
49
+ m
50
+ }
geo-llm-r.Rproj → inat-ranges.Rproj RENAMED
@@ -1,5 +1,5 @@
1
  Version: 1.0
2
- ProjectId: 337131c5-7aa5-4963-bc4e-d8a156e206a0
3
 
4
  RestoreWorkspace: Default
5
  SaveWorkspace: Default
@@ -12,6 +12,3 @@ Encoding: UTF-8
12
 
13
  RnwWeave: Sweave
14
  LaTeX: pdfLaTeX
15
-
16
- AutoAppendNewline: Yes
17
- StripTrailingWhitespace: Yes
 
1
  Version: 1.0
2
+ ProjectId: 6e13bbf8-d79c-409c-a98f-e51f6bdd7e7a
3
 
4
  RestoreWorkspace: Default
5
  SaveWorkspace: Default
 
12
 
13
  RnwWeave: Sweave
14
  LaTeX: pdfLaTeX
 
 
 
preprocess.R ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(dplyr)
2
+ library(duckdbfs)
3
+ library(ggplot2)
4
+ library(mapgl)
5
+ library(glue)
6
+ load_h3()
7
+ load_spatial()
8
+
9
+ duckdbfs::duckdb_secrets()
10
+ inat = open_dataset("s3://public-inat/rangemaps")
11
+
12
+ # Access SVI
13
+ #svi = open_dataset("https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract.parquet")
14
+ tracts = open_dataset("https://minio.carlboettiger.info/public-social-vulnerability/2022-tracts-h3-z8.parquet") # Access CalEnviroScreen
15
+ # ces = open_dataset("https://minio.carlboettiger.info/public-calenviroscreen/ces_2021.parquet", format="parquet")
16
+
17
+ # Filter GBIF to our area-of-interest (h-index) and species of interest
18
+
19
+ ca = tracts |>
20
+ filter(STATE == "California") |>
21
+ mutate(h3 = h3_cell_to_parent(h8, 3L)) |>
22
+ mutate(h3 = tolower(as.character(h3)))
23
+
24
+ ca |> inner_join(inat, by = "h3") |> count(STATE, COUNTY, FIPS, h8)
25
+
26
+
27
+ bird_counts = sf_birds |>
28
+ count(FIPS, geom) |>
29
+ mutate(richness = n / {total})
30
+
31
+ ces_poverty = ces |> select("Poverty", "FIPS")
32
+ combined <- svi |>
33
+ select("RPL_THEMES", "FIPS") |> filter(RPL_THEMES > 0) |>
34
+ inner_join(bird_counts, "FIPS") |>
35
+ inner_join(ces_poverty, "FIPS") |>
36
+ mutate(svi_bin = cut(RPL_THEMES, breaks = c(0, .25, .50, .75, 1),
37
+ labels = c("Q1", "Q2", "Q3", "Q4"))) |>
38
+ mutate(poverty_bin = cut(Poverty, breaks = c(0, 25, 50, 75, 100),
39
+ labels = c("0-25", "25-50", "50-75", "75-100")))
40
+
41
+
test-app.R ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(shiny)
2
+ library(mapgl)
3
+
4
+ source("utils.R")
5
+ source("inat-ranges.R")
6
+
7
+ load_h3()
8
+ load_spatial()
9
+
10
+ duckdbfs::duckdb_secrets()
11
+ inat <- open_dataset("s3://public-inat/hex")
12
+
13
+ # dummy case
14
+ aoi <- spData::us_states
15
+
16
+ taxa <- open_dataset("https://minio.carlboettiger.info/public-inat/taxonomy/taxa.csv", format = "csv", recursive = FALSE)
17
+
18
+
19
+ # publish richness at the aoi (bbox or poly)
20
+ m = maplibre(center = c(-110.5, 37), zoom = 3)
21
+
22
+
23
+ ui <- fluidPage(
24
+ maplibreOutput("map"),
25
+ actionButton("get_features", "Get Drawn Features"),
26
+ verbatimTextOutput("feature_output")
27
+ )
28
+
29
+ server <- function(input, output, session) {
30
+ output$map <- renderMaplibre({
31
+ m
32
+ })
33
+
34
+ output$feature_output <- renderPrint({
35
+ print(input$map_bbox)
36
+ })
37
+
38
+
39
+ observeEvent(input$map_bbox, {
40
+
41
+ bbox =sf::st_bbox(unlist(input$map_bbox), crs = 4326)
42
+ print(bbox)
43
+
44
+ output$map <- renderMaplibre({
45
+ richness(inat, bbox)
46
+ richness_map(m)
47
+ })
48
+ })
49
+
50
+ # observeEvent(input$get_features, {
51
+ # drawn_features <- get_drawn_features(mapboxgl_proxy("map"))
52
+ # output$feature_output <- renderPrint({
53
+ # print(drawn_features)
54
+ # })
55
+ # })
56
+ }
57
+
58
+ shinyApp(ui, server)
test.R CHANGED
@@ -1,69 +1,103 @@
1
  ## Illustrate/test core app functionality without shiny
2
-
3
- library(tidyverse)
4
  library(duckdbfs)
 
5
  library(mapgl)
6
- library(ellmer)
7
  library(glue)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- repo <- "https://data.source.coop/cboettig/social-vulnerability"
10
- pmtiles <- glue("{repo}/svi2020_us_tract.pmtiles")
11
- parquet <- glue("{repo}/svi2020_us_tract.parquet")
12
- svi <- open_dataset(parquet, tblname = "svi") |> filter(RPL_THEMES > 0)
13
-
14
- schema <- read_file("schema.yml")
15
- system_prompt <- glue::glue(readr::read_file("system-prompt.md"),
16
- .open = "<", .close = ">")
17
-
18
-
19
-
20
- # Or optionally test with cirrus
21
- chat <- ellmer::chat_vllm(
22
- base_url = "https://llm.cirrus.carlboettiger.info/v1/",
23
- model = "kosbu/Llama-3.3-70B-Instruct-AWQ",
24
- api_key = Sys.getenv("CIRRUS_LLM_KEY"),
25
- system_prompt = system_prompt,
26
- api_args = list(temperature = 0)
27
- )
28
-
29
- # or use the NRP model
30
- chat <- ellmer::chat_vllm(
31
- base_url = "https://llm.nrp-nautilus.io/",
32
- model = "llama3",
33
- api_key = Sys.getenv("NRP_API_KEY"),
34
- system_prompt = system_prompt,
35
- api_args = list(temperature = 0)
36
- )
37
-
38
-
39
- # Test a chat-based response
40
- chat$chat("Which columns describes racial components of social vulnerability?")
41
- ## A query-based response
42
- stream <- chat$chat("Which counties in California have the highest average social vulnerability?")
43
- response <- jsonlite::fromJSON(stream)
44
-
45
- con <- duckdbfs::cached_connection()
46
- filtered_data <- DBI::dbGetQuery(con, response$query)
47
-
48
- filter_column <- function(full_data, filtered_data, id_col) {
49
- if (nrow(filtered_data) < 1) return(NULL)
50
- values <- full_data |>
51
- inner_join(filtered_data, copy = TRUE) |>
52
- pull(id_col)
53
- # maplibre syntax for the filter of PMTiles
54
- list("in", list("get", id_col), list("literal", values))
55
- }
56
-
57
- maplibre(center = c(-102.9, 41.3), zoom = 3) |>
58
- add_fill_layer(
59
- id = "svi_layer",
60
- source = list(type = "vector", url = paste0("pmtiles://", pmtiles)),
61
- source_layer = "SVI2000_US_tract",
62
- filter = filter_column(full_data, filtered_data, "FIPS"),
63
- fill_opacity = 0.5,
64
- fill_color = interpolate(column = "RPL_THEMES",
65
- values = c(0, 1),
66
- stops = c("#e19292c0", "darkblue"),
67
- na_color = "lightgrey")
68
- )
69
 
 
1
  ## Illustrate/test core app functionality without shiny
2
+ library(dplyr)
 
3
  library(duckdbfs)
4
+ library(ggplot2)
5
  library(mapgl)
 
6
  library(glue)
7
+ load_h3()
8
+ load_spatial()
9
+
10
+ source("utils.R")
11
+ source("inat-ranges.R")
12
+
13
+
14
+ duckdbfs::duckdb_secrets()
15
+ inat <- open_dataset("s3://public-inat/hex")
16
+
17
+ aoi <- spData::us_states
18
+
19
+ taxa <- open_dataset("https://minio.carlboettiger.info/public-inat/taxonomy/taxa.csv", format = "csv", recursive = FALSE)
20
+
21
+ # publish richness at the aoi (bbox or poly)
22
+ richness(inat, aoi)
23
+ richness_map()
24
+
25
+
26
+
27
+ ## UGH can't deal with antimeridian
28
+ # dropme <- antimeridian_hexes(3)
29
+ # dropme <- antimeridian_hexes(4) |> rename(h4 = h3id)
30
+ # inat |> anti_join(dropme) |> write_dataset("s3://public-inat/ranges.parquet")
31
+ # inat <- open_dataset("s3://public-inat/ranges.parquet", recursive = FALSE)
32
+
33
+ # mutate(h3 = h3_cell_to_parent(h4, 3L))
34
+
35
+
36
+
37
+ m <- maplibre(center = c(-110.5, 34.8), zoom = 4) |> add_draw_control()
38
+ richness_map(m, "https://minio.carlboettiger.info/public-data/inat-tmp-ranges.h3j")
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+ library(htmlwidgets)
51
+ htmlwidgets::saveWidget(m, "example.html")
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
+
62
+
63
+
64
+ amphib = open_dataset("s3://public-inat/polygon/Amphibia.parquet", recursive = FALSE)
65
+
66
+ gdf <- amphib |>
67
+ filter(name == "Ambystoma californiense") |>
68
+ to_sf(crs=4326)
69
+
70
+ maplibre(center = c(-122.5, 37.8), zoom = 4) |>
71
+ add_source(id = "gdf", gdf) |>
72
+ add_layer("gdf-layer",
73
+ type = "fill",
74
+ source = "gdf",
75
+ paint = list(
76
+ "fill-color" = "darkgreen",
77
+ "fill-opacity" = .9
78
+ )
79
+ )
80
+
81
+
82
+
83
+
84
+
85
+ # Access SVI
86
+ #svi = open_dataset("https://minio.carlboettiger.info/public-social-vulnerability/2022/SVI2022_US_tract.parquet")
87
+ #tracts = open_dataset("https://minio.carlboettiger.info/public-social-vulnerability/2022-tracts-h3-z5.parquet") # Access CalEnviroScreen
88
+ # ces = open_dataset("https://minio.carlboettiger.info/public-calenviroscreen/ces_2021.parquet", format="parquet")
89
+
90
+ # Filter GBIF to our area-of-interest (h-index) and species of interest
91
+
92
+ ca <- tracts |>
93
+ filter(STATE == "California") |>
94
+ mutate(h4 = h3_cell_to_parent(h5, 4L)) |>
95
+ mutate(h4 = tolower(as.character(h4)))
96
+
97
+ out <- ca |>
98
+ inner_join(inat, by = "h4") |>
99
+ count(STATE, COUNTY, FIPS, h5)
100
+
101
 
102
+ # mutate(height = n / max(n)) |>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
utils.R CHANGED
@@ -1,4 +1,130 @@
1
 
2
- library(tidyverse)
3
  library(duckdbfs)
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ library(dplyr)
3
  library(duckdbfs)
4
+ library(sf)
5
 
6
+ as_dataset.sf <- function(sf, ...) {
7
+ # cludgy way to get polygon into duckdb as spatial data
8
+ tmp <- tempfile(fileext = ".fgb")
9
+ sf |> sf::st_transform(4326) |> sf::write_sf(tmp, append = FALSE, quiet = TRUE)
10
+ aoi <- duckdbfs::open_dataset(tmp, ...)
11
+
12
+ aoi
13
+ }
14
+
15
+ # promote bbox to polygon
16
+ as_poly <- function(aoi) {
17
+
18
+ crs <- st_crs(aoi)
19
+ if (crs$input != "EPSG:4326" ) {
20
+ aoi <- aoi |> st_transform(4326)
21
+
22
+ }
23
+ if (inherits(aoi, "bbox")) {
24
+ aoi <- aoi |>
25
+ st_as_sfc() |>
26
+ st_as_sf() |>
27
+ rename(geom = x)
28
+ }
29
+ aoi
30
+ }
31
+
32
+
33
+
34
+ get_h3_aoi <- function(aoi, zoom = 0L, precision = 6L, upper = FALSE) {
35
+ duckdbfs::load_h3()
36
+
37
+ zoom <- as.integer(zoom)
38
+ # consider auto-retry at higher precision if subset is empty.
39
+ precision <- as.integer(precision)
40
+ res <- paste0("h", precision)
41
+
42
+ if(inherits(aoi, "sf") || inherits(aoi, "bbox")) {
43
+ aoi <- as_poly(aoi)
44
+ aoi <- as_dataset.sf(aoi)
45
+ }
46
+
47
+ # multipolygon dump may not be needed for draw tools.
48
+ h3_aoi <- aoi |>
49
+ dplyr::mutate(poly = array_extract(unnest(st_dump(geom)),"geom"),
50
+ h3id = h3_polygon_wkt_to_cells(poly,{precision}),
51
+ h3id = unnest(h3id)
52
+ ) |>
53
+ dplyr::mutate(h0 = h3_h3_to_string( h3_cell_to_parent(h3id, {zoom})),
54
+ h3id = h3_h3_to_string (h3id) )
55
+
56
+ if(upper) {
57
+ h3_aoi <- h3_aoi |>
58
+ dplyr::mutate(h0 = toupper(h0), h3id = toupper(h3id))
59
+ }
60
+
61
+ h3_aoi |>
62
+ dplyr::select(h0, h3id) |>
63
+ duckdbfs::as_view("h3_aoi")
64
+ }
65
+
66
+ hex_res <- function(x) {
67
+ x |>
68
+ utils::head(1) |>
69
+ dplyr::mutate(res = h3_get_resolution(h3id)) |>
70
+ dplyr::pull(res)
71
+ }
72
+
73
+ hex_join <- function(x,y) {
74
+ res_x <- hex_res(x)
75
+ res_y <- hex_res(y)
76
+
77
+ if (res_x > res_y) {
78
+ y <- y |>
79
+ dplyr::mutate(h3id = unnest(h3_cell_to_children(h3id, {res_x})),
80
+ h3id = toupper(h3id))
81
+ }
82
+ if (res_x < res_y) {
83
+ y <- y |>
84
+ dplyr::mutate(h3id = h3_cell_to_parent(h3id, {res_x}))
85
+ }
86
+
87
+ dplyr::inner_join(x, y)
88
+ }
89
+
90
+
91
+ antimeridian_hexes <- function(zoom = 4, con = duckdbfs::cached_connection()) {
92
+ duckdbfs::load_h3(con)
93
+ duckdbfs::load_spatial(con)
94
+ DBI::dbExecute(con,
95
+ "
96
+ CREATE OR REPLACE TABLE antimeridian AS (
97
+ SELECT ST_GeomFromText(
98
+ 'POLYGON ((170 85, 190 85, 190 -85, 170 -85, 170 85))'
99
+ ) AS geometry
100
+ )
101
+ ")
102
+ zoom <- as.integer(zoom)
103
+
104
+ am <-
105
+ dplyr::tbl(con, "antimeridian") |>
106
+ dplyr::mutate(h3id = unnest(
107
+ h3_polygon_wkt_to_cells_string(geometry, {zoom})
108
+ )) |>
109
+ dplyr::select(h3id)
110
+
111
+ am
112
+ }
113
+
114
+
115
+ viridis_pal <-
116
+ function(column = "height",
117
+ n = 61,
118
+ min_v = 0,
119
+ max_v = 1) {
120
+ pal <- viridisLite::viridis(n)
121
+ fill_color = mapgl::step_expr(
122
+ column = column,
123
+ base = pal[1],
124
+ stops = pal[2:n],
125
+ values = seq(min_v, max_v, length.out = n-1),
126
+ na_color = "white"
127
+ )
128
+
129
+ fill_color
130
+ }