Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import requests | |
import geopandas as gpd | |
import pandas as pd | |
from shapely.geometry import shape | |
API_KEY = "ea9637fd9f0c41f3e2e932faa99dfcd76f8041aa" | |
# Mapping dictionaries | |
state_abbrev_to_fips = { | |
"AL": "01", "AK": "02", "AZ": "04", "AR": "05", "CA": "06", "CO": "08", | |
"CT": "09", "DE": "10", "DC": "11", "FL": "12", "GA": "13", "HI": "15", | |
"ID": "16", "IL": "17", "IN": "18", "IA": "19", "KS": "20", "KY": "21", | |
"LA": "22", "ME": "23", "MD": "24", "MA": "25", "MI": "26", "MN": "27", | |
"MS": "28", "MO": "29", "MT": "30", "NE": "31", "NV": "32", "NH": "33", | |
"NJ": "34", "NM": "35", "NY": "36", "NC": "37", "ND": "38", "OH": "39", | |
"OK": "40", "OR": "41", "PA": "42", "RI": "44", "SC": "45", "SD": "46", | |
"TN": "47", "TX": "48", "UT": "49", "VT": "50", "VA": "51", "WA": "53", | |
"WV": "54", "WI": "55", "WY": "56" | |
} | |
fips_to_abbrev = {v: k for k, v in state_abbrev_to_fips.items()} | |
def fetch_state_data(): | |
"""Fetch state-level Census data and return as a DataFrame.""" | |
state_url = ( | |
f"https://api.census.gov/data/2019/pep/population" | |
f"?get=NAME,POP&for=state:*&key={API_KEY}" | |
) | |
r_states = requests.get(state_url) | |
state_data = r_states.json() | |
df = pd.DataFrame(state_data[1:], columns=state_data[0]) | |
df["POP"] = df["POP"].astype(int) | |
return df | |
def fetch_county_data(): | |
"""Fetch county-level Census data and return as a DataFrame.""" | |
county_url = ( | |
f"https://api.census.gov/data/2019/pep/population" | |
f"?get=NAME,POP&for=county:*&key={API_KEY}" | |
) | |
r_counties = requests.get(county_url) | |
county_data = r_counties.json() | |
df = pd.DataFrame(county_data[1:], columns=county_data[0]) | |
df["POP"] = df["POP"].astype(int) | |
df[['countyName', 'stateName']] = df['NAME'].str.split(',', expand=True) | |
df["FIPS"] = df["state"].str.zfill(2) + df["county"].str.zfill(3) | |
return df | |
def fetch_geojson(url: str): | |
"""Fetch the GeoJSON data from the provided URL.""" | |
return requests.get(url).json() | |
def load_epa_data(): | |
""" | |
Load EPA region data from the CSV file. | |
This version explicitly reads only the 'Region' and 'States' columns, | |
which prevents extra trailing commas from creating additional columns. | |
""" | |
df = pd.read_csv("epa_regions.csv", usecols=["Region", "States"]) | |
# Force uppercase on the state abbreviations and strip whitespace | |
df["States"] = df["States"].str.strip().str.upper() | |
return df | |
def build_states_gdf(state_df, state_abbrev_to_fips): | |
""" | |
Build a GeoDataFrame for US states with Census data and EPA region attached. | |
""" | |
state_pop_dict = state_df.set_index("state")["POP"].to_dict() | |
state_name_dict = state_df.set_index("state")["NAME"].to_dict() | |
url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json" | |
geo_data = fetch_geojson(url) | |
rows = [] | |
for feat in geo_data["features"]: | |
abbrev = feat["id"].upper() # Ensure uppercase | |
geom = shape(feat["geometry"]) | |
fips = state_abbrev_to_fips.get(abbrev) | |
if fips: | |
pop_val = state_pop_dict.get(fips, "No data") | |
name_val = state_name_dict.get(fips, "No data") | |
rows.append({ | |
"geometry": geom, | |
"STATE_FIPS": fips, | |
"NAME": name_val, | |
"POP": pop_val | |
}) | |
gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326") | |
gdf["POP_TT"] = gdf["POP"].apply(lambda x: f"{int(x):,}" if isinstance(x, int) else "No data") | |
# Add state abbreviation column for merging EPA data | |
gdf["STATE_ABBR"] = gdf["STATE_FIPS"].map(fips_to_abbrev) | |
# Merge EPA data | |
epa_df = load_epa_data() | |
gdf = gdf.merge(epa_df, left_on="STATE_ABBR", right_on="States", how="left") | |
# Rename EPA column and ensure it's numeric | |
gdf.rename(columns={"Region": "EPA_REGION"}, inplace=True) | |
gdf["EPA_REGION"] = pd.to_numeric(gdf["EPA_REGION"], errors="coerce") | |
return gdf | |
def build_counties_gdf(county_df): | |
""" | |
Build a GeoDataFrame for US counties, simplify geometries, | |
and merge EPA region data. | |
""" | |
county_pop_dict = county_df.set_index("FIPS")["POP"].to_dict() | |
county_name_dict = county_df.set_index("FIPS")["NAME"].to_dict() | |
url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json" | |
geo_data = fetch_geojson(url) | |
rows = [] | |
for feat in geo_data["features"]: | |
fips = feat["id"] | |
geom = shape(feat["geometry"]) | |
pop_val = county_pop_dict.get(fips, "No data") | |
name_val = county_name_dict.get(fips, "No data") | |
rows.append({ | |
"geometry": geom, | |
"FIPS": fips, | |
"NAME": name_val, | |
"POP": pop_val | |
}) | |
gdf = gpd.GeoDataFrame(rows, crs="EPSG:4326") | |
gdf["geometry"] = gdf["geometry"].simplify(tolerance=0.05, preserve_topology=True) | |
# Add state abbreviation column for merging EPA data | |
gdf["STATE_ABBR"] = gdf["FIPS"].str[:2].map(fips_to_abbrev) | |
epa_df = load_epa_data() | |
gdf = gdf.merge(epa_df, left_on="STATE_ABBR", right_on="States", how="left") | |
gdf.rename(columns={"Region": "EPA_REGION"}, inplace=True) | |
gdf["EPA_REGION"] = pd.to_numeric(gdf["EPA_REGION"], errors="coerce") | |
return gdf | |
def load_and_merge_caps(states_gdf): | |
""" | |
Merge state-level climate action plan data with the states GeoDataFrame | |
and precompute display columns. | |
""" | |
caps_df = pd.read_csv("caps_plans.csv") | |
caps_df["State"] = caps_df["State"].str.strip().str.upper() | |
caps_df["STATE_FIPS"] = caps_df["State"].map(state_abbrev_to_fips) | |
caps_df["plan_info"] = caps_df.apply( | |
lambda row: f"{row['City']}, {row['Year']}, {row['Plan Type']}", axis=1 | |
) | |
grouped = caps_df.groupby("STATE_FIPS").agg( | |
n_caps=("Plan Type", "count"), | |
plan_list=("plan_info", lambda x: list(x)) | |
).reset_index() | |
merged = states_gdf.merge(grouped, on="STATE_FIPS", how="left") | |
merged["n_caps"] = merged["n_caps"].fillna(0).astype(int) | |
merged["plan_list"] = merged["plan_list"].apply(lambda x: x if isinstance(x, list) else []) | |
if "POP_TT" not in merged.columns: | |
merged["POP_TT"] = merged["POP"].apply(lambda x: f"{int(x):,}" if isinstance(x, int) else "No data") | |
return merged | |
def load_and_merge_caps_county(counties_gdf): | |
""" | |
Merge county-level climate action plan data with the counties GeoDataFrame | |
and precompute display columns. | |
""" | |
caps_df = pd.read_csv("caps_plans.csv") | |
mapping_df = pd.read_csv("city_county_mapping.csv") | |
caps_df["State"] = caps_df["State"].str.strip().str.upper() | |
mapping_df["CountyKey"] = mapping_df["County"].apply( | |
lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() | |
) | |
merged_caps = pd.merge( | |
caps_df, mapping_df, | |
left_on=["City", "State"], | |
right_on=["City", "State"], | |
how="left" | |
) | |
merged_caps["plan_info"] = merged_caps.apply( | |
lambda row: f"{row['City']}, {row['Year']}, {row['Plan Type']}", axis=1 | |
) | |
merged_caps["CountyKey"] = merged_caps["County"].apply( | |
lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() if pd.notnull(x) else None | |
) | |
grouped = merged_caps.groupby(["CountyKey", "State"]).agg( | |
n_caps=("Plan Type", "count"), | |
plan_list=("plan_info", lambda x: list(x)) | |
).reset_index() | |
counties_gdf["STATE"] = counties_gdf["FIPS"].str[:2].map(fips_to_abbrev) | |
counties_gdf["CountyKey"] = counties_gdf["NAME"].apply( | |
lambda x: x.upper().split(',')[0].replace(" COUNTY", "").strip() | |
) | |
merged_counties = counties_gdf.merge( | |
grouped, | |
left_on=["CountyKey", "STATE"], | |
right_on=["CountyKey", "State"], | |
how="left" | |
) | |
merged_counties["n_caps"] = merged_counties["n_caps"].fillna(0).astype(int) | |
merged_counties["plan_list"] = merged_counties["plan_list"].apply(lambda x: x if isinstance(x, list) else []) | |
merged_counties["POP_TT"] = merged_counties["POP"].apply( | |
lambda x: f"{int(x):,}" if pd.notnull(x) and isinstance(x, (int, float)) else "No data" | |
) | |
merged_counties["FIPS_TT"] = merged_counties["FIPS"].apply( | |
lambda x: x.zfill(5) if isinstance(x, str) and x.isdigit() else "No data" | |
) | |
return merged_counties | |
def load_city_mapping(): | |
"""Load the city mapping CSV for marker locations.""" | |
df = pd.read_csv("city_county_mapping.csv") | |
df["City"] = df["City"].str.strip() | |
df["State"] = df["State"].str.strip().str.upper() | |
df["Latitude"] = pd.to_numeric(df["Latitude"], errors="coerce") | |
df["Longitude"] = pd.to_numeric(df["Longitude"], errors="coerce") | |
return df | |
def load_city_plans(): | |
"""Load and group climate action plan data by city and state.""" | |
df = pd.read_csv("caps_plans.csv") | |
df["City"] = df["City"].str.strip() | |
df["State"] = df["State"].str.strip().str.upper() | |
df["plan_info"] = df.apply(lambda row: f"{row['Year']}, {row['Plan Type']}", axis=1) | |
grouped = df.groupby(["City", "State"]).agg(plan_list=("plan_info", lambda x: list(x))).reset_index() | |
return grouped | |
def merge_nri_data(states_gdf_caps, counties_gdf_caps): | |
""" | |
Merge the NRI Future Risk Index data with the state and county GeoDataFrames. | |
""" | |
# Read in and select only the relevant columns, rounding numeric values | |
nri_cols = [ | |
"STATEABBRV", "STATE", "COUNTY", "STCOFIPS", | |
"CFLD_MID_HIGHER_PRISKS", "CFLD_LATE_HIGHER_PRISKS", | |
"CFLD_MID_HIGHER_HM", "CFLD_LATE_HIGHER_HM", | |
"WFIR_MID_HIGHER_PRISKS", "WFIR_LATE_HIGHER_PRISKS", | |
"WFIR_MID_HIGHER_HM", "WFIR_LATE_HIGHER_HM", | |
"DRGT_MID_HIGHER_PRISKS", "DRGT_LATE_HIGHER_PRISKS", | |
"DRGT_MID_HIGHER_HM", "DRGT_LATE_HIGHER_HM", | |
"HRCN_MID_HIGHER_PRISKS", "HRCN_LATE_HIGHER_PRISKS", | |
"HRCN_MID_HIGHER_HM", "HRCN_LATE_HIGHER_HM", | |
"EXHT_L95_MID_HIGHER_PRISKS", "EXHT_L95_LATE_HIGHER_PRISKS", | |
"EXHT_L95_MID_HIGHER_HM", "EXHT_L95_LATE_HIGHER_HM" | |
] | |
nri_df = pd.read_excel("data/NRI Future Risk Index.xlsx")[nri_cols].round(2) | |
# List the risk columns (all the ones we wish to average) | |
risk_cols = [ | |
"CFLD_MID_HIGHER_PRISKS", "CFLD_LATE_HIGHER_PRISKS", | |
"CFLD_MID_HIGHER_HM", "CFLD_LATE_HIGHER_HM", | |
"WFIR_MID_HIGHER_PRISKS", "WFIR_LATE_HIGHER_PRISKS", | |
"WFIR_MID_HIGHER_HM", "WFIR_LATE_HIGHER_HM", | |
"DRGT_MID_HIGHER_PRISKS", "DRGT_LATE_HIGHER_PRISKS", | |
"DRGT_MID_HIGHER_HM", "DRGT_LATE_HIGHER_HM", | |
"HRCN_MID_HIGHER_PRISKS", "HRCN_LATE_HIGHER_PRISKS", | |
"HRCN_MID_HIGHER_HM", "HRCN_LATE_HIGHER_HM", | |
"EXHT_L95_MID_HIGHER_PRISKS", "EXHT_L95_LATE_HIGHER_PRISKS", | |
"EXHT_L95_MID_HIGHER_HM", "EXHT_L95_LATE_HIGHER_HM" | |
] | |
# Compute the state-level means on the risk columns | |
grouped_states = nri_df.groupby("STATE")[risk_cols].mean().round(2) | |
# Convert FIPS codes to numeric to ensure proper merging | |
counties_gdf_caps['FIPS'] = pd.to_numeric(counties_gdf_caps['FIPS'], errors='coerce').fillna(0).astype(int) | |
nri_df['STCOFIPS'] = pd.to_numeric(nri_df['STCOFIPS'], errors='coerce').fillna(0).astype(int) | |
# Merge the aggregated state data with the states GeoDataFrame | |
# Here, grouped_states is indexed by state name so we merge using the index. | |
merged_states_gdf = states_gdf_caps.merge(grouped_states, left_on="NAME", right_index=True, how="left") | |
# Merge the original NRI data with the counties GeoDataFrame on the FIPS codes | |
merged_counties_gdf = counties_gdf_caps.merge(nri_df, left_on="FIPS", right_on="STCOFIPS", how="left") | |
return merged_states_gdf, merged_counties_gdf | |
def merge_fema_data(states_gdf_caps, counties_gdf_caps): | |
""" | |
Merge the FEMA data with the state and county GeoDataFrames. | |
""" | |
fema_df = pd.read_csv("data/fema_data.csv") | |
# List the FEMA numeric columns to average. | |
# Note: The ISTM columns are kept with their original names. | |
fema_cols = [ | |
"RISK_SCORE", "RISK_SPCTL", "EAL_VALT", "SOVI_SCORE", "RESL_SCORE", | |
"AVLN_EALS", "AVLN_EALT", "CFLD_EALS", "CFLD_EALT", "CWAV_EALS", "CWAV_EALT", | |
"DRGT_EALS", "DRGT_EALT", "HAIL_EALS", "HAIL_EALT", "HWAV_EALS", "HWAV_EALT", | |
"HRCN_EALS", "HRCN_EALT", "ISTM_EALS", "ISTM_EALT", "LNDS_EALS", "LNDS_EALT", | |
"RFLD_EALS", "RFLD_EALT", "SWND_EALS", "SWND_EALT", "TRND_EALS", "TRND_EALT", | |
"WFIR_EALS", "WFIR_EALT", "WNTW_EALS", "WNTW_EALT" | |
] | |
# Compute the county-level means using the numeric columns | |
grouped_counties = fema_df.groupby("STCOFIPS")[fema_cols].mean().round(2) | |
# Similarly, compute state-level means using the same set of columns | |
grouped_states = fema_df.groupby("STATEABBRV")[fema_cols].mean().round(2) | |
# Merge the aggregated county data with the counties GeoDataFrame | |
# Here, grouped_counties is indexed by STCOFIPS so we merge on the index. | |
merged_counties_gdf = counties_gdf_caps.merge(grouped_counties, left_on="FIPS", right_index=True, how="left") | |
# Merge the aggregated state data with the states GeoDataFrame | |
merged_states_gdf = states_gdf_caps.merge(grouped_states, left_on="STATE_ABBR", right_index=True, how="left") | |
return merged_states_gdf, merged_counties_gdf | |
def merge_cejst_data(states_gdf_caps, counties_gdf_caps): | |
""" | |
Merge the CEJST data with the state and county GeoDataFrames. | |
""" | |
cejst_df = pd.read_csv("data/cejst.csv") | |
# Ensure cejst only contains the desired columns | |
cejst = cejst_df[['County Name', | |
'State/Territory', | |
'Share of properties at risk of flood in 30 years (percentile)', | |
'Share of properties at risk of flood in 30 years', | |
'Share of properties at risk of fire in 30 years (percentile)', | |
'Share of properties at risk of fire in 30 years', | |
'Energy burden (percentile)', | |
'PM2.5 in the air (percentile)', | |
'PM2.5 in the air', | |
"Share of the tract's land area that is covered by impervious surface or cropland as a percent", | |
'Current asthma among adults aged greater than or equal to 18 years']].round(2) | |
numeric_cols = cejst.select_dtypes(include=['number']).columns | |
# Group by county and state (county-state combination) and calculate the mean of all numeric columns. | |
county_state_means = cejst.groupby(['County Name', 'State/Territory'])[numeric_cols].mean().round(2).reset_index() | |
# Group by state only and calculate the mean of all numeric columns. | |
state_means = cejst.groupby('State/Territory')[numeric_cols].mean().round(2).reset_index() | |
# Create a new column in county_state_means that matches the format of NAME in county_data | |
county_state_means["NAME"] = county_state_means["County Name"] + ", " + county_state_means["State/Territory"] | |
# Perform the merge using the newly created NAME column | |
merged_counties_gdf = counties_gdf_caps.merge(county_state_means, on="NAME", how="left") | |
# Merge the aggregated state data with the states GeoDataFrame | |
merged_states_gdf = states_gdf_caps.merge(state_means, left_on="NAME", right_on="State/Territory", how="left") | |
return merged_states_gdf, merged_counties_gdf | |
if __name__ == "__main__": | |
# Fetch and process Census data | |
state_df = fetch_state_data() | |
county_df = fetch_county_data() | |
states_gdf = build_states_gdf(state_df, state_abbrev_to_fips) | |
counties_gdf = build_counties_gdf(county_df) | |
states_gdf_caps = load_and_merge_caps(states_gdf) | |
counties_gdf_caps = load_and_merge_caps_county(counties_gdf) | |
states_gdf_caps, counties_gdf_caps = merge_nri_data(states_gdf_caps, counties_gdf_caps) | |
states_gdf_caps, counties_gdf_caps = merge_fema_data(states_gdf_caps, counties_gdf_caps) | |
states_gdf_caps, counties_gdf_caps = merge_cejst_data(states_gdf_caps, counties_gdf_caps) | |
city_mapping_df = load_city_mapping() | |
city_plans_df = load_city_plans() | |
# Save all data as binary (pickle) files for fast loading later | |
state_df.to_pickle("./maps_helpers/state_df.pkl") | |
county_df.to_pickle("./maps_helpers/county_df.pkl") | |
states_gdf_caps.to_pickle("./maps_helpers/states_gdf_caps.pkl") | |
counties_gdf_caps.to_pickle("./maps_helpers/counties_gdf_caps.pkl") | |
city_mapping_df.to_pickle("./maps_helpers/city_mapping_df.pkl") | |
city_plans_df.to_pickle("./maps_helpers/city_plans_df.pkl") | |