cassiebuhler commited on
Commit
1d924fc
·
1 Parent(s): f64f7ab

whoops gotta add these back

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Tpl
3
- emoji: 💻
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: streamlit
 
1
  ---
2
+ title: TPL
3
+ emoji: 🌳
4
  colorFrom: indigo
5
  colorTo: blue
6
  sdk: streamlit
app/app.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+
14
+ # +
15
+ import leafmap.foliumap as leafmap
16
+ import streamlit as st
17
+ from minio import Minio
18
+ import os
19
+ from datetime import timedelta
20
+ import pandas as pd
21
+
22
+ # Get signed URLs to access license-controlled layers
23
+ key = st.secrets["MINIO_KEY"]
24
+ secret = st.secrets["MINIO_SECRET"]
25
+ client = Minio("minio.carlboettiger.info", key, secret)
26
+
27
+
28
+ pmtiles = client.get_presigned_url(
29
+ "GET",
30
+ "shared-tpl",
31
+ "tpl.pmtiles",
32
+ expires=timedelta(hours=2),
33
+ )
34
+
35
+ parquet = client.get_presigned_url(
36
+ "GET",
37
+ "shared-tpl",
38
+ "tpl.parquet",
39
+ expires=timedelta(hours=2),
40
+ )
41
+
42
+ geojson = client.get_presigned_url(
43
+ "GET",
44
+ "shared-tpl",
45
+ "tpl.geojson",
46
+ expires=timedelta(hours=2),
47
+ )
48
+
49
+ # -
50
+
51
+
52
+ basemaps = leafmap.basemaps.keys()
53
+
54
+ # +
55
+
56
+ ## Protected Area polygon color codes
57
+
58
+ style_options = {
59
+ "Manager Type": {
60
+ 'property': 'Manager_Type',
61
+ 'type': 'categorical',
62
+ 'stops': [
63
+ ['FED', "darkblue"],
64
+ ['STAT', "blue"],
65
+ ['LOC', "lightblue"],
66
+ ['DIST', "darkgreen"],
67
+ ['UNK', "grey"],
68
+ ['JNT', "green"],
69
+ ['TRIB', "purple"],
70
+ ['PVT', "darkred"],
71
+ ['NGO', "orange"]
72
+ ]
73
+ },
74
+ "Access": {
75
+ 'property': 'Public_Access_Type',
76
+ 'type': 'categorical',
77
+ 'stops': [
78
+ ['OA', "green"],
79
+ ['XA', "red"],
80
+ ['UK', "grey"],
81
+ ['RA', "orange"]
82
+ ]
83
+ },
84
+ "Purpose": {
85
+ 'property': 'Purpose_Type',
86
+ 'type': 'categorical',
87
+ 'stops': [
88
+ ['FOR', "green"],
89
+ ['HIST', "red"],
90
+ ['UNK', "grey"],
91
+ ['OTH', "grey"],
92
+ ['FARM', "yellow"],
93
+ ['REC', "blue"],
94
+ ['ENV', "purple"],
95
+ ['SCE', "orange"],
96
+ ['RAN', "pink"]
97
+ ]
98
+ }
99
+ }
100
+
101
+
102
+
103
+ notused = {
104
+ "Amount": ["interpolate",
105
+ ['exponential', 1],
106
+ ["get", "Amount"],
107
+ 0, "#FCE2DC",
108
+ 34273487, "#F8C3BF",
109
+ 68546973, "#F4A5A2",
110
+ 102820460, "#F08785",
111
+ 137093947, "#EB6968",
112
+ 171367433, "#DB5157",
113
+ 205640920, "#BE4152",
114
+ 239914407, "#A0304C",
115
+ 274187893, "#832047",
116
+ 308461380, "#661042",
117
+ ]
118
+ }
119
+
120
+ # +
121
+ st.set_page_config(layout="wide",
122
+ page_title="TPL Conservation Almanac",
123
+ page_icon=":globe:")
124
+
125
+ '''
126
+ # TPL Conservation Almanac
127
+
128
+ A data visualization tool built for the Trust for Public Land
129
+
130
+ '''
131
+
132
+ m = leafmap.Map(center=[35, -100], zoom=5, layers_control=True, fullscreen_control=True)
133
+
134
+
135
+ def pad_style(paint, alpha):
136
+ return {
137
+ "version": 8,
138
+ "sources": {
139
+ "source1": {
140
+ "type": "vector",
141
+ "url": "pmtiles://" + pmtiles,
142
+ "attribution": "TPL"}},
143
+ "layers": [{
144
+ "id": "TPL",
145
+ "source": "source1",
146
+ "source-layer": "tpl",
147
+ "type": "fill",
148
+ "paint": {
149
+ "fill-color": paint,
150
+ "fill-opacity": alpha
151
+ }
152
+ }]}
153
+
154
+
155
+
156
+ code_ex='''
157
+ m.add_cog_layer("https://data.source.coop/vizzuality/lg-land-carbon-data/natcrop_expansion_100m_cog.tif",
158
+ palette="oranges", name="Cropland Expansion", transparent_bg=True, opacity = 0.7, zoom_to_layer=False)
159
+ '''
160
+ # -
161
+ ## Map controls sidebar
162
+ with st.sidebar:
163
+
164
+ if st.toggle("Protected Areas", True):
165
+ alpha = st.slider("transparency", 0.0, 1.0, 0.5)
166
+ style_choice = st.radio("Color by:", style_options)
167
+ style = pad_style(style_options[style_choice], alpha)
168
+ m.add_pmtiles(pmtiles, name="Conservation Protected Areas", style=style, overlay=True, show=True, zoom_to_layer=False)
169
+ ## Add legend based on selected style?
170
+ # m.add_legend(legend_dict=legend_dict)
171
+
172
+ b = st.selectbox("Basemap", basemaps)
173
+ m.add_basemap(b)
174
+
175
+ # And here we go!
176
+ m.to_streamlit(height=600)
177
+
178
+ st.divider()
179
+
180
+ import altair as alt
181
+ import ibis
182
+ from ibis import _
183
+ import ibis.selectors as s
184
+
185
+
186
+ # +
187
+ @st.cache_resource
188
+ def tpl_database(parquet):
189
+ df = ibis.read_parquet(parquet)
190
+ return df
191
+
192
+ df = tpl_database(parquet)
193
+
194
+
195
+ # +
196
+ @st.cache_data
197
+ def tpl_summary(_df):
198
+ summary = _df.group_by(_.Manager_Type).agg(Amount = _.Amount.sum())
199
+ public_dollars = round( summary.filter(_.Manager_Type.isin(["FED", "STAT", "LOC", "DIST"])).agg(total = _.Amount.sum()).to_pandas().values[0][0] )
200
+ private_dollars = round( summary.filter(_.Manager_Type.isin(["PVT", "NGO"])).agg(total = _.Amount.sum()).to_pandas().values[0][0] )
201
+ tribal_dollars = round( summary.filter(_.Manager_Type.isin(["TRIB"])).agg(total = _.Amount.sum()).to_pandas().values[0][0] )
202
+ total_dollars = round( summary.agg(total = _.Amount.sum()).to_pandas().values[0][0] )
203
+ return public_dollars, private_dollars, tribal_dollars, total_dollars
204
+
205
+ public_dollars, private_dollars, tribal_dollars, total_dollars = tpl_summary(df)
206
+
207
+ # +
208
+ # areas actively managed / owned / sponsored by TPL
209
+ # tpl = (df
210
+ # .filter(_.Sponsor_Name.lower().re_search("trust for public land") | _.Owner_Name.lower().re_search("trust for public land") | _.Manager_Name.lower().re_search("trust for public land"))
211
+ # .agg(Amount = _.Amount.sum(),
212
+ # area_hectares = _.Shape_Area.sum() / 10000)
213
+ # .order_by(_.Amount.desc())
214
+ # .to_pandas()
215
+ # )
216
+ # -
217
+
218
+
219
+
220
+
221
+
222
+ # +
223
+ @st.cache_data
224
+ def calc_delta(_df):
225
+ deltas = (_df
226
+ .group_by(_.Manager_Type, _.Close_Year)
227
+ .agg(Amount = _.Amount.sum())
228
+ #.filter(_.Manager_Type.isin(["FED"]))
229
+ # .order_by(_.Close_Year)
230
+ .mutate(total = _.Amount.cumsum(order_by=_.Close_Year, group_by=_.Manager_Type))
231
+ .mutate(lag = _.total.lag(1))
232
+ .mutate(delta = (100*(_.total - _.lag) / _.total).round(2) )
233
+ .filter(_.Close_Year >=2019)
234
+ .select(_.Manager_Type, _.Close_Year, _.total, _.lag, _.delta)
235
+ )
236
+
237
+ public_delta = deltas.filter(_.Manager_Type.isin(["FED", "STAT", "LOC", "DIST"])).to_pandas().delta[0]
238
+ private_delta = deltas.filter(_.Manager_Type.isin(["PVT", "NGO"])).to_pandas().delta[0]
239
+ trib_delta = deltas.filter(_.Manager_Type=="TRIB").to_pandas().delta[0]
240
+
241
+ #total_dollars = round( summary.agg(total = _.Amount.sum()).to_pandas().values[0][0] )
242
+
243
+ return public_delta, private_delta, trib_delta
244
+
245
+ public_delta, private_delta, trib_delta = calc_delta(df)
246
+ # -
247
+
248
+
249
+ with st.container():
250
+ col1, col2, col3, col4 = st.columns(4)
251
+ col1.metric(label=f"Public", value=f"${public_dollars:,}", delta = f"{public_delta:}%")
252
+ col2.metric(label=f"Private", value=f"${private_dollars:,}", delta = f"{private_delta:}%")
253
+ col3.metric(label=f"Tribal", value=f"${tribal_dollars:,}", delta = f"{trib_delta:}%")
254
+ col4.metric(label=f"Total", value=f"${total_dollars:,}")
255
+
256
+ selected = style_options[style_choice]
257
+ column = selected["property"]
258
+ colors = dict(selected["stops"])
259
+
260
+
261
+ # +
262
+ @st.cache_data
263
+ def get_area_totals(_df, column):
264
+ return _df.group_by(_[column]).agg(area = _.Shape_Area.sum() / (100*100)).to_pandas()
265
+ area_totals = get_area_totals(df,column)
266
+
267
+ @st.cache_data
268
+ def bar(area_totals, column):
269
+ plt = alt.Chart(area_totals).mark_bar().encode(
270
+ x=column,
271
+ y=alt.Y("area").scale(type="log"),
272
+ color=alt.Color(column).scale(domain = list(colors.keys()), range = list(colors.values()))
273
+ ).properties(height=350)
274
+ return plt
275
+ #bar
276
+
277
+
278
+ # +
279
+
280
+ @st.cache_data
281
+ def calc_timeseries(_df, column):
282
+ timeseries = (
283
+ _df
284
+ .filter(~_.Close_Year.isnull())
285
+ .filter(_.Close_Year > 0)
286
+ .group_by([_.Close_Year, _[column]])
287
+ .agg(Amount = _.Amount.sum())
288
+ .mutate(Close_Year = _.Close_Year.cast("int"),
289
+ Amount = _.Amount.cumsum(group_by=_[column], order_by=_.Close_Year))
290
+
291
+ .to_pandas()
292
+ )
293
+ return timeseries
294
+ timeseries = calc_timeseries(df, column)
295
+
296
+ @st.cache_data
297
+ def chart_time(timeseries, column):
298
+ # use the colors
299
+ plt = alt.Chart(timeseries).mark_line().encode(
300
+ x='Close_Year:O',
301
+ y = alt.Y('Amount:Q'),
302
+ color=alt.Color(column).scale(domain = list(colors.keys()), range = list(colors.values()))
303
+ ).properties(height=350)
304
+ return plt
305
+
306
+
307
+ # +
308
+ st.divider()
309
+
310
+ with st.container():
311
+ plt1, plt2 = st.columns(2)
312
+
313
+ with plt1:
314
+ "Total Area protected (hectares):"
315
+ st.altair_chart(bar(area_totals, column))
316
+ with plt2:
317
+ "Annual investment ($) in protected area"
318
+ st.altair_chart(chart_time(timeseries, column))
319
+
320
+
321
+ # +
322
+
323
+ import leafmap.deckgl as deckgl
324
+ from shapely import wkb
325
+ import geopandas as gpd
326
+
327
+ @st.cache_data
328
+ def leaf_map(gdf):
329
+ m = deckgl.Map(center=[35, -100], zoom=4)
330
+ m.add_gdf(gdf)
331
+ return m.to_streamlit()
332
+
333
+ @st.cache_data
334
+ def crs():
335
+ conn = ibis.duckdb.connect()
336
+ crs = conn.read_geo("static/test.geojson").crs
337
+ return crs
338
+
339
+ @st.cache_data
340
+ def query_database(response):
341
+ z = con.execute(response).fetchall()
342
+ return pd.DataFrame(z).head(250)
343
+
344
+ @st.cache_data
345
+ def get_geom(tbl):
346
+ #tbl['geometry'] = tbl['geometry'].apply(wkb.loads)
347
+ gdf = gpd.GeoDataFrame(tbl, geometry='geometry')
348
+ gdf.to_crs({'init': 'epsg:4326'})
349
+
350
+ return gdf
351
+
352
+ ## Database connection, reading directly from remote parquet file
353
+ from sqlalchemy import create_engine
354
+ from langchain.sql_database import SQLDatabase
355
+ db_uri = "duckdb:///my.duckdb"
356
+ engine = create_engine(db_uri) #connect_args={'read_only': True})
357
+ con = engine.connect()
358
+ con.execute("install spatial; load spatial;")
359
+ con.execute(f"create or replace table protected as select *, st_geomfromwkb(geom) as geometry from read_parquet('{parquet}');").fetchall()
360
+ db = SQLDatabase(engine, view_support=True)
361
+
362
+ from langchain_openai import ChatOpenAI
363
+ from langchain_community.llms import Ollama
364
+ models = {
365
+ "chatgpt3.5": ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
366
+ "chatgpt-o4": ChatOpenAI(model="gpt-4o", temperature=0, api_key=st.secrets["OPENAI_API_KEY"]),
367
+ }
368
+ other_models ={
369
+ "duckdb-nsql": Ollama(model="duckdb-nsql", temperature=0),
370
+ "sqlcoder": Ollama(model="mannix/defog-llama3-sqlcoder-8b", temperature=0),
371
+ "mixtral": Ollama(model="mixtral", temperature=0),
372
+ "wizardlm2": Ollama(model="wizardlm2", temperature=0),
373
+ "sqlcoder": Ollama(model="sqlcoder", temperature=0),
374
+ "zephyr": Ollama(model="zephyr", temperature=0),
375
+ "llama3": Ollama(model="llama3", temperature=0),
376
+ }
377
+
378
+ map_tool = {"leafmap": leaf_map,
379
+ # "deckgl": deck_map
380
+ }
381
+
382
+
383
+ with st.sidebar:
384
+ st.divider()
385
+ choice = st.radio("Select an LLM:", models)
386
+ llm = models[choice]
387
+ map_choice = st.radio("Select mapping tool", map_tool)
388
+ mapper = map_tool[map_choice]
389
+
390
+ ## A SQL Chain
391
+ from langchain.chains import create_sql_query_chain
392
+ chain = create_sql_query_chain(llm, db)
393
+
394
+
395
+ st.divider()
396
+
397
+
398
+ @st.cache_data
399
+ def convert_df(df):
400
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
401
+ return df.to_csv().encode("utf-8")
402
+
403
+
404
+ with st.container():
405
+
406
+ '''
407
+ Ask a question! Some examples:
408
+
409
+ - What is are most expensive protected sites?
410
+ - Which states have the highest average cost per acre?
411
+ - Which sites are owned, managed or sponsored by the Trust for Public Land? include all columns
412
+ '''
413
+
414
+ chatbox = st.container()
415
+ with chatbox:
416
+ if prompt := st.chat_input(key="chain"):
417
+ st.chat_message("user").write(prompt)
418
+ with st.chat_message("assistant"):
419
+ response = chain.invoke({"question": prompt + " No limit, use fuzzy matching when asked to match specific names."})
420
+ st.write(response)
421
+ tbl = query_database(response)
422
+ #if 'geometry' in tbl:
423
+ # gdf = get_geom(tbl)
424
+ # mapper(gdf)
425
+ # n = len(gdf)
426
+ # st.write(f"matching features: {n}")
427
+ st.dataframe(tbl)
428
+ csv = convert_df(tbl)
429
+ st.download_button(label="Download data as CSV",
430
+ data=csv,
431
+ file_name="results.csv",
432
+ mime="text/csv")
433
+
434
+
435
+ # +
436
+ st.divider()
437
+
438
+ st.markdown('''
439
+
440
+ ## Data Sources
441
+
442
+ PRIVATE DRAFT. Developed at UC Berkeley. All data copyright to Trust for Public Land. See <https://conservationalmanac.org/> for details.
443
+
444
+ ''')
preprocessing/pre-processing.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ jupytext:
3
+ formats: md:myst
4
+ text_representation:
5
+ extension: .md
6
+ format_name: myst
7
+ format_version: 0.13
8
+ jupytext_version: 1.16.2
9
+ kernelspec:
10
+ display_name: Python 3 (ipykernel)
11
+ language: python
12
+ name: python3
13
+ ---
14
+
15
+ ```{code-cell} ipython3
16
+ import ibis
17
+ from ibis import _
18
+ from minio import Minio
19
+ import streamlit as st
20
+ from datetime import timedelta
21
+ ```
22
+
23
+ ```{code-cell} ipython3
24
+ # Get signed URLs to access license-controlled layers
25
+ key = st.secrets["MINIO_KEY"]
26
+ secret = st.secrets["MINIO_SECRET"]
27
+ client = Minio("minio.carlboettiger.info", key, secret)
28
+
29
+ parquet = client.get_presigned_url(
30
+ "GET",
31
+ "shared-tpl",
32
+ "tpl.parquet",
33
+ expires=timedelta(hours=2),
34
+ )
35
+ ```
36
+
37
+ ```{code-cell} ipython3
38
+ con = ibis.duckdb.connect()
39
+ df = con.read_parquet(parquet)
40
+ ```
preprocessing/tpl.html ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f73c60799960ab27820166b0cf19de6e29b1b28e19b6d3a8568553f6dce201ac
3
+ size 118505121