Map-Data / utilities /data_loader.py
akhil-vaidya's picture
feat: streamlit-app
1662e26
import requests
import pandas as pd
import re
import math
from typing import Tuple, List, Dict
def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]:
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
(
node["name"](around:{radius},{lat},{lon});
way["name"](around:{radius},{lat},{lon});
relation["name"](around:{radius},{lat},{lon});
);
out center;
"""
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()
return data['elements']
def determine_location_type(tags: Dict[str, str]) -> str:
# Residential
if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']:
return 'Residential'
# Commercial
if any(key in tags for key in ['shop', 'office', 'craft']):
return 'Commercial'
if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']:
return 'Commercial'
# Industrial
if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']:
return 'Industrial'
if 'industrial' in tags or 'industry' in tags:
return 'Industrial'
# Educational
if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']:
return 'Educational'
# Healthcare
if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']:
return 'Healthcare'
# Food & Drink
if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']:
return 'Food & Drink'
# Leisure & Entertainment
if 'leisure' in tags or 'tourism' in tags:
return 'Leisure & Entertainment'
if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']:
return 'Leisure & Entertainment'
# Transportation
if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']:
return 'Transportation'
if 'highway' in tags or 'railway' in tags or 'aeroway' in tags:
return 'Transportation'
# Religious
if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']:
return 'Religious'
# Government & Public Services
if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']:
return 'Government & Public Services'
# Parks & Recreation
if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']:
return 'Parks & Recreation'
# Natural
if 'natural' in tags:
return 'Natural'
# Landuse
if 'landuse' in tags:
landuse = tags['landuse'].capitalize()
if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']:
return landuse
else:
return f'Landuse: {landuse}'
# If no specific category is found, return 'Other'
return 'Other'
def parse_osm_data(elements: List[Dict]) -> pd.DataFrame:
parsed_data = []
for element in elements:
tags = element.get('tags', {})
parsed_element = {
'ID': f"{element['type']}_{element['id']}",
'Location Name': tags.get('name', ''),
'Location Type': determine_location_type(tags)
}
parsed_data.append(parsed_element)
if len(parsed_data) == 0:
return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type'])
return pd.DataFrame(parsed_data)
def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame:
raw_data = fetch_osm_data(lat, lon, radius)
return parse_osm_data(raw_data)
def dms_to_decimal(coord_str):
# Regular expression to match the coordinate format
pattern = r'(\d+)°(\d+)\'([\d.]+)"([NS])\s*(\d+)°(\d+)\'([\d.]+)"([EW])'
match = re.match(pattern, coord_str)
if not match:
raise ValueError("Invalid coordinate format. Expected format: 19°03'08.6\"N 72°54'06.0\"E")
lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups()
# Convert to decimal degrees
lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600
lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600
# Adjust sign based on direction
if lat_dir == 'S':
lat = -lat
if lon_dir == 'W':
lon = -lon
return lat, lon
def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple:
# Earth's radius in meters
R = 6371000
# Convert latitude and longitude to radians
lat_rad = math.radians(lat)
lon_rad = math.radians(lon)
# Calculate the point with the same latitude (moving east-west)
delta_lon = distance / (R * math.cos(lat_rad))
lon1 = lon + math.degrees(delta_lon)
# Calculate the point with the same longitude (moving north-south)
delta_lat = distance / R
lat2 = lat + math.degrees(delta_lat)
return ((lat, lon1), (lat2, lon))
## 2d map grid (0,0) --> bottom left
def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]:
grid = []
lat_unit = (top_right[0] - bottom_left[0]) / rows
lon_unit = (top_right[1] - bottom_left[1]) / cols
for i in range(rows):
row = []
for j in range(cols):
lat = bottom_left[0] + i * lat_unit
lon = bottom_left[1] + j * lon_unit
lat = lat + lat_unit / 2
lon = lon + lon_unit / 2
row.append((lat, lon))
grid.append(row)
return grid
## entire pipeline
# left_lat = 18.889833
# left_lon = 72.779844
# dist = 35
def input_filter(lat=None, lon=None, string=None):
if lat != None:
return (lat, lon)
elif string != None:
latitude, longitude = dms_to_decimal(string)
return (latitude, longitude)
else:
return None
def get_data(bottom_left_lat, bottom_left_lon, dist):
result = calculate_distant_points(bottom_left_lat, bottom_left_lon, 1000*dist)
top_right_lat = result[1][0]
top_right_lon = result[0][1]
grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)
grid_dataset = []
for i, row in enumerate(grid):
for j, point in enumerate(row):
result_df = get_osm_data(point[0], point[1], 710)
# print(result_df.head(3))
labelled_df = result_df[result_df['Location Type'] != 'Other']
labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']
labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']
loc_types = []
for row in labelled_df.iterrows():
loc_type = (row[1]['Location Name'], row[1]['Location Type'])
if loc_type not in loc_types:
loc_types.append(loc_type)
labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])
row_of_dataset = ''
for row in labelled_df.iterrows():
row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']
row_of_dataset += row_text + '; '
## replacing any coma in the text with a blank space
row_of_dataset = row_of_dataset.replace(',', ' ')
grid_row = {"row": i, "col": j, "latitude": point[0], "longitude": point[1], "Map Data": row_of_dataset}
grid_dataset.append(grid_row)
grid_df = pd.DataFrame(grid_dataset)
return grid_df
# grid_df.to_csv('MMR_DATASET.csv', index=False)