File size: 8,012 Bytes
c161b3b 1662e26 c161b3b 1662e26 c161b3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import requests
import pandas as pd
import re
import math
from typing import Tuple, List, Dict
def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]:
overpass_url = ""
overpass_query = f"""
out center;
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()
return data['elements']
def determine_location_type(tags: Dict[str, str]) -> str:
# Residential
if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']:
return 'Residential'
# Commercial
if any(key in tags for key in ['shop', 'office', 'craft']):
return 'Commercial'
if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']:
return 'Commercial'
# Industrial
if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']:
return 'Industrial'
if 'industrial' in tags or 'industry' in tags:
return 'Industrial'
# Educational
if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']:
return 'Educational'
# Healthcare
if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']:
return 'Healthcare'
# Food & Drink
if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']:
return 'Food & Drink'
# Leisure & Entertainment
if 'leisure' in tags or 'tourism' in tags:
return 'Leisure & Entertainment'
if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']:
return 'Leisure & Entertainment'
# Transportation
if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']:
return 'Transportation'
if 'highway' in tags or 'railway' in tags or 'aeroway' in tags:
return 'Transportation'
# Religious
if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']:
return 'Religious'
# Government & Public Services
if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']:
return 'Government & Public Services'
# Parks & Recreation
if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']:
return 'Parks & Recreation'
# Natural
if 'natural' in tags:
return 'Natural'
# Landuse
if 'landuse' in tags:
landuse = tags['landuse'].capitalize()
if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']:
return landuse
return f'Landuse: {landuse}'
# If no specific category is found, return 'Other'
return 'Other'
def parse_osm_data(elements: List[Dict]) -> pd.DataFrame:
parsed_data = []
for element in elements:
tags = element.get('tags', {})
parsed_element = {
'ID': f"{element['type']}_{element['id']}",
'Location Name': tags.get('name', ''),
'Location Type': determine_location_type(tags)
if len(parsed_data) == 0:
return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type'])
return pd.DataFrame(parsed_data)
def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame:
raw_data = fetch_osm_data(lat, lon, radius)
return parse_osm_data(raw_data)
def dms_to_decimal(coord_str):
# Regular expression to match the coordinate format
pattern = r'(\d+)°(\d+)\'([\d.]+)"([NS])\s*(\d+)°(\d+)\'([\d.]+)"([EW])'
match = re.match(pattern, coord_str)
if not match:
raise ValueError("Invalid coordinate format. Expected format: 19°03'08.6\"N 72°54'06.0\"E")
lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups()
# Convert to decimal degrees
lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600
lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600
# Adjust sign based on direction
if lat_dir == 'S':
lat = -lat
if lon_dir == 'W':
lon = -lon
return lat, lon
def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple:
# Earth's radius in meters
R = 6371000
# Convert latitude and longitude to radians
lat_rad = math.radians(lat)
lon_rad = math.radians(lon)
# Calculate the point with the same latitude (moving east-west)
delta_lon = distance / (R * math.cos(lat_rad))
lon1 = lon + math.degrees(delta_lon)
# Calculate the point with the same longitude (moving north-south)
delta_lat = distance / R
lat2 = lat + math.degrees(delta_lat)
return ((lat, lon1), (lat2, lon))
## 2d map grid (0,0) --> bottom left
def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]:
grid = []
lat_unit = (top_right[0] - bottom_left[0]) / rows
lon_unit = (top_right[1] - bottom_left[1]) / cols
for i in range(rows):
row = []
for j in range(cols):
lat = bottom_left[0] + i * lat_unit
lon = bottom_left[1] + j * lon_unit
lat = lat + lat_unit / 2
lon = lon + lon_unit / 2
row.append((lat, lon))
return grid
## entire pipeline
# left_lat = 18.889833
# left_lon = 72.779844
# dist = 35
def input_filter(lat=None, lon=None, string=None):
if lat != None:
return (lat, lon)
elif string != None:
latitude, longitude = dms_to_decimal(string)
return (latitude, longitude)
return None
def get_data(bottom_left_lat, bottom_left_lon, dist):
result = calculate_distant_points(bottom_left_lat, bottom_left_lon, 1000*dist)
top_right_lat = result[1][0]
top_right_lon = result[0][1]
grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)
grid_dataset = []
for i, row in enumerate(grid):
for j, point in enumerate(row):
result_df = get_osm_data(point[0], point[1], 710)
# print(result_df.head(3))
labelled_df = result_df[result_df['Location Type'] != 'Other']
labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']
labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']
loc_types = []
for row in labelled_df.iterrows():
loc_type = (row[1]['Location Name'], row[1]['Location Type'])
if loc_type not in loc_types:
labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])
row_of_dataset = ''
for row in labelled_df.iterrows():
row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']
row_of_dataset += row_text + '; '
## replacing any coma in the text with a blank space
row_of_dataset = row_of_dataset.replace(',', ' ')
grid_row = {"row": i, "col": j, "latitude": point[0], "longitude": point[1], "Map Data": row_of_dataset}
grid_df = pd.DataFrame(grid_dataset)
return grid_df
# grid_df.to_csv('MMR_DATASET.csv', index=False) |