File size: 5,761 Bytes
219309c
 
 
 
4c8f4a0
 
 
 
e8afe79
219309c
4c8f4a0
a6bff0e
4c8f4a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8afe79
 
219309c
 
 
4c8f4a0
c25ad97
4c8f4a0
 
e8afe79
 
 
219309c
9883a96
 
e04f2af
 
4c8f4a0
e8afe79
4c8f4a0
 
 
 
 
e8afe79
4c8f4a0
e8afe79
4c8f4a0
 
 
e8afe79
4c8f4a0
 
 
e8afe79
4c8f4a0
219309c
4c8f4a0
e8afe79
4c8f4a0
 
 
 
 
e8afe79
4c8f4a0
 
 
e8afe79
4c8f4a0
e8afe79
4c8f4a0
 
 
 
 
 
 
e8afe79
4c8f4a0
 
 
 
e5528c2
e8afe79
 
4c8f4a0
 
 
 
219309c
 
 
 
 
 
 
e8afe79
4c8f4a0
 
 
e8afe79
4c8f4a0
 
 
 
 
 
 
e8afe79
4c8f4a0
e8afe79
4c8f4a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e8afe79
 
 
4c8f4a0
 
 
 
 
e8afe79
4c8f4a0
 
e8afe79
4c8f4a0
e8afe79
 
4c8f4a0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
from pathlib import Path
from urllib.request import urlretrieve

import cartopy.crs as ccrs
import fugue.api as fa
import geopandas as gpd
import geoviews as gv
import panel as pn
import pandas as pd
import pyarrow as pa
from datasets import load_dataset_builder
from holoviews.streams import RangeXY
from shapely import wkt

gv.extension("bokeh")
pn.extension("tabulator")

INTRO = """
    *Have you ever looked at a street name and wondered how common it is?*

    Put your curiosity to rest with MapnStreets! By simply entering a name
    in the provided box, you can discover the prevalence of a street name.
    The map will display the locations of all streets with that name,
    and for more detailed information, you can click on the table to
    highlight their exact whereabouts.

    Uses [TIGER/Line® Edges](https://www2.census.gov/geo/tiger/TIGER_RD18/LAYER/EDGES/)
    data provided by the US Census Bureau.

    Powered by OSS:
    [Fugue](https://fugue-tutorials.readthedocs.io),
    [Panel](https://panel.holoviz.org/),
    [GeoPandas](https://geopandas.org/),
    [GeoViews](https://geoviews.org/),
    [Parquet](https://parquet.apache.org/),
    [DuckDB](https://duckdb.org/),
    [Ray](https://ray.io/),
    and all their supporting dependencies.
"""

DATA_DIR = Path.home() / ".cache" / "huggingface" / "datasets"
DATA_PATH = DATA_DIR / "edges.parquet"

QUERY_FMT = """
    df = LOAD "{{data_path}}"
    df_sel = SELECT STATEFP, COUNTYFP, FULLNAME, geometry \
        FROM df WHERE FULLNAME == '{{name}}'
"""


def download_hf(path: str, **kwargs):
    builder = load_dataset_builder("ahuang11/tiger_layer_edges")
    builder.download_and_prepare(DATA_PATH, file_format="parquet")


class MapnStreets:
    def __init__(self):
        self.gdf = None
        self.name_input = pn.widgets.TextInput(
            value="*Andrew St",
            placeholder="Enter a name...",
            margin=(9, 5, 5, 25),
        )
        pn.bind(self.process_name, self.name_input, watch=True)

        features = gv.tile_sources.CartoDark()
        self.holoviews_pane = pn.pane.HoloViews(
            features, sizing_mode="stretch_both", min_height=800
        )
        self.tabulator = pn.widgets.Tabulator(width=225, disabled=True)
        self.records_text = pn.widgets.StaticText(value="<h3>0 records found</h3>")
        pn.state.onload(self.onload)

    def onload(self):
        download_hf("ahuang11/tiger_layer_edges")
        self.name_input.param.trigger("value")

        range_xy = RangeXY()
        line_strings = gv.DynamicMap(
            self.refresh_line_strings, streams=[range_xy]
        ).opts(responsive=True)
        range_xy.source = line_strings

        points = gv.DynamicMap(
            pn.bind(self.refresh_points, self.tabulator.param.selection)
        ).opts(responsive=True)

        self.holoviews_pane.object *= line_strings * points

    def serialize_geom(self, df):
        df["geometry"] = df["geometry"].apply(wkt.loads)
        gdf = gpd.GeoDataFrame(df)
        centroids = gdf["geometry"].centroid
        gdf["Longitude"] = centroids.x
        gdf["Latitude"] = centroids.y
        return gdf

    def process_name(self, name):
        try:
            name = name.strip()
            self.holoviews_pane.loading = True
            query_fmt = QUERY_FMT
            if "*" in name or "%" in name:
                name = name.replace("*", "%")
                query_fmt = query_fmt.replace("==", "LIKE")
            if name == "%":
                return
            df = fa.as_pandas(
                fa.fugue_sql(
                    query_fmt,
                    data_path=str(DATA_PATH.absolute()),
                    name=name,
                    engine="duckdb",
                    as_local=True,
                )
            )
            self.gdf = self.serialize_geom(df)
            county_gdf = self.gdf.drop_duplicates(
                subset=["STATEFP", "COUNTYFP", "FULLNAME"]
            )
            self.records_text.value = f"<h3>{len(county_gdf)} records found</h3>"
            self.tabulator.value = (
                county_gdf["FULLNAME"]
                .value_counts()
                .rename_axis("Name")
                .rename("Count")
                .to_frame()
            )
            self.refresh_line_strings()
        finally:
            self.holoviews_pane.loading = False

    def refresh_line_strings(self, x_range=None, y_range=None):
        line_strings = gv.Polygons(
            self.gdf[["geometry"]],
            crs=ccrs.PlateCarree(),
        ).opts(fill_alpha=0, line_color="white", line_width=8, alpha=0.6)
        return line_strings.select(x=x_range, y=y_range)

    def refresh_points(self, selection):
        gdf_selection = self.gdf[
            ["Longitude", "Latitude", "STATEFP", "COUNTYFP", "FULLNAME"]
        ]
        if self.tabulator.selection:
            names = self.tabulator.value.iloc[selection].index.tolist()
            gdf_selection = gdf_selection.loc[gdf_selection["FULLNAME"].isin(names)]
        points = gv.Points(
            gdf_selection,
            kdims=["Longitude", "Latitude"],
            vdims=["STATEFP", "COUNTYFP", "FULLNAME"],
            crs=ccrs.PlateCarree(),
        ).opts(marker="x", tools=["hover"], color="#FF4136", size=8)
        return points

    def view(self):
        template = pn.template.FastListTemplate(
            header=[pn.Row(self.name_input, self.records_text)],
            sidebar=[INTRO, self.tabulator],
            main=[
                self.holoviews_pane,
            ],
            theme="dark",
            title="MapnStreets",
            sidebar_width=225,
        )
        return template.servable()


mapn_streets = MapnStreets()
mapn_streets.view()