import pandas as pd
import folium
from sklearn.cluster import KMeans
from folium.plugins import MarkerCluster
import requests
from io import BytesIO
import streamlit as st
import folium
from streamlit.components.v1 import html
# Load data from Excel (directly from the URL)
def load_data(url):
# Request the file content
response = requests.get(url)
# Check if the content is an Excel file by inspecting the MIME type
if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' not in response.headers['Content-Type']:
raise ValueError("The file is not a valid Excel file.")
# Read the file content into a pandas dataframe with the engine specified
lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
# Merge data on school_id_giga
merged_data = pd.merge(
lat_long_data,
measurement_data,
left_on="school_id_giga",
right_on="school_id_giga",
how="inner"
)
# Strip whitespace from all column names
merged_data.columns = merged_data.columns.str.strip()
# Print columns to check the available columns in the data
print("Columns in merged data:", merged_data.columns)
return merged_data
# Perform clustering to find data center location
def find_data_center(df, n_clusters=1):
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
return kmeans.cluster_centers_
# Create a map and plot the points
def plot_map(df, center):
# Create map centered on the data center location
map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
marker_cluster = MarkerCluster().add_to(map)
# Add school locations to the map
for idx, row in df.iterrows():
school_name = row.get("school_name", "No Name Provided") # Ensure correct column access
folium.Marker(
location=[row["latitude"], row["longitude"]],
popup=(
f"School Name: {school_name}
"
f"Download Speed: {row['download_speed']} Mbps
"
f"Upload Speed: {row['upload_speed']} Mbps
"
f"Latency: {row['latency']} ms"
),
icon=folium.Icon(color="blue", icon="info-sign")
).add_to(marker_cluster)
# Add data center location to the map
folium.Marker(
location=[center[0][0], center[0][1]],
popup="Proposed Data Center",
icon=folium.Icon(color="red", icon="cloud")
).add_to(map)
return map
# Main function to run the application
def main():
url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx" # Correct raw file URL
df = load_data(url)
center = find_data_center(df)
map = plot_map(df, center)
# Embed the map directly in the Streamlit app
map_html = map._repr_html_() # Render the folium map as HTML
html(map_html, width=700, height=500) # Adjust the size of the embedded map
st.title("Data Center Location Mapping")
st.write("This map shows school locations and proposed data center locations based on clustering.")
if __name__ == "__main__":
main()