|
import streamlit as st |
|
import geopandas as gpd |
|
import pandas as pd |
|
import pyarrow.parquet as pq |
|
from huggingface_hub import hf_hub_download |
|
import warnings |
|
|
|
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*USECOLS.*") |
|
|
|
|
|
st.set_page_config(page_title="Optimized GADM Data Processing", layout="wide") |
|
|
|
|
|
hf_api_key = st.secrets["World_Map_Dataset_Cleaning"] |
|
repo_id = "hussain2010/World_Map_Files" |
|
filename = "gadm_410.gpkg" |
|
|
|
|
|
st.title("π Optimized GADM Geospatial Data Processing") |
|
|
|
|
|
st.subheader("π₯ Downloading Dataset from Hugging Face") |
|
try: |
|
dataset_path = hf_hub_download( |
|
repo_id=repo_id, filename=filename, token=hf_api_key, repo_type="dataset" |
|
) |
|
st.success("β
Dataset downloaded successfully!") |
|
except Exception as e: |
|
st.error(f"β οΈ Error downloading dataset: {e}") |
|
st.stop() |
|
|
|
|
|
st.subheader("π Loading Dataset Efficiently") |
|
try: |
|
|
|
metadata_df = gpd.read_file(dataset_path, layer=0, usecols=["GID_0", "NAME_0"]).drop_duplicates() |
|
|
|
|
|
selected_country = st.selectbox("π Select a Country", metadata_df["NAME_0"].unique()) |
|
|
|
|
|
gdf = gpd.read_file(dataset_path, layer=0, where=f"NAME_0 = '{selected_country}'") |
|
|
|
|
|
gdf = gdf[gdf.is_valid] |
|
|
|
|
|
if gdf.crs and gdf.crs.to_string() != "EPSG:4326": |
|
gdf = gdf.to_crs("EPSG:4326") |
|
|
|
|
|
gdf["geometry_wkt"] = gdf["geometry"].apply(lambda geom: geom.wkt if geom else None) |
|
|
|
|
|
gdf.drop(columns=["geometry"], inplace=True) |
|
|
|
|
|
optimized_file = "optimized_gadm.parquet" |
|
gdf.to_parquet(optimized_file, engine="pyarrow") |
|
|
|
|
|
st.write("β Memory-efficient dataset preview:") |
|
st.dataframe(gdf.head(100)) |
|
|
|
|
|
with open(optimized_file, "rb") as file: |
|
st.download_button("π₯ Download Optimized Geospatial Data", data=file, file_name="optimized_gadm.parquet", mime="application/octet-stream") |
|
|
|
except Exception as e: |
|
st.error(f"β οΈ Error processing file: {e}") |
|
|