File size: 8,012 Bytes
c161b3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1662e26
 
 
c161b3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1662e26
c161b3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import requests
import pandas as pd
import re
import math
from typing import Tuple, List, Dict

def fetch_osm_data(lat: float, lon: float, radius: int) -> List[Dict]:
    overpass_url = "http://overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
    (
      node["name"](around:{radius},{lat},{lon});
      way["name"](around:{radius},{lat},{lon});
      relation["name"](around:{radius},{lat},{lon});
    );
    out center;
    """
    
    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()
    return data['elements']

def determine_location_type(tags: Dict[str, str]) -> str:
    # Residential
    if 'building' in tags and tags['building'] in ['residential', 'house', 'apartments', 'detached', 'terrace', 'dormitory', 'bungalow']:
        return 'Residential'
    
    # Commercial
    if any(key in tags for key in ['shop', 'office', 'craft']):
        return 'Commercial'
    if 'building' in tags and tags['building'] in ['commercial', 'office', 'retail', 'supermarket', 'kiosk']:
        return 'Commercial'
    
    # Industrial
    if 'building' in tags and tags['building'] in ['industrial', 'warehouse', 'factory', 'manufacture']:
        return 'Industrial'
    if 'industrial' in tags or 'industry' in tags:
        return 'Industrial'
    
    # Educational
    if 'amenity' in tags and tags['amenity'] in ['school', 'university', 'college', 'library', 'kindergarten', 'language_school']:
        return 'Educational'
    
    # Healthcare
    if 'amenity' in tags and tags['amenity'] in ['hospital', 'clinic', 'doctors', 'dentist', 'pharmacy', 'veterinary']:
        return 'Healthcare'
    
    # Food & Drink
    if 'amenity' in tags and tags['amenity'] in ['restaurant', 'cafe', 'bar', 'fast_food', 'pub', 'food_court']:
        return 'Food & Drink'
    
    # Leisure & Entertainment
    if 'leisure' in tags or 'tourism' in tags:
        return 'Leisure & Entertainment'
    if 'amenity' in tags and tags['amenity'] in ['theatre', 'cinema', 'nightclub', 'arts_centre', 'community_centre']:
        return 'Leisure & Entertainment'
    
    # Transportation
    if 'amenity' in tags and tags['amenity'] in ['parking', 'bicycle_parking', 'bus_station', 'ferry_terminal']:
        return 'Transportation'
    if 'highway' in tags or 'railway' in tags or 'aeroway' in tags:
        return 'Transportation'
    
    # Religious
    if 'amenity' in tags and tags['amenity'] in ['place_of_worship', 'monastery']:
        return 'Religious'
    
    # Government & Public Services
    if 'amenity' in tags and tags['amenity'] in ['townhall', 'courthouse', 'police', 'fire_station', 'post_office']:
        return 'Government & Public Services'
    
    # Parks & Recreation
    if 'leisure' in tags and tags['leisure'] in ['park', 'playground', 'sports_centre', 'stadium', 'garden']:
        return 'Parks & Recreation'
    
    # Natural
    if 'natural' in tags:
        return 'Natural'
    
    # Landuse
    if 'landuse' in tags:
        landuse = tags['landuse'].capitalize()
        if landuse in ['Residential', 'Commercial', 'Industrial', 'Retail']:
            return landuse
        else:
            return f'Landuse: {landuse}'
    
    # If no specific category is found, return 'Other'
    return 'Other'

def parse_osm_data(elements: List[Dict]) -> pd.DataFrame:
    parsed_data = []
    for element in elements:
        tags = element.get('tags', {})
        parsed_element = {
            'ID': f"{element['type']}_{element['id']}",
            'Location Name': tags.get('name', ''),
            'Location Type': determine_location_type(tags)
        }
        parsed_data.append(parsed_element)
    if len(parsed_data) == 0:
        return pd.DataFrame(columns=['ID', 'Location Name', 'Location Type'])
    return pd.DataFrame(parsed_data)

def get_osm_data(lat: float, lon: float, radius: int) -> pd.DataFrame:
    raw_data = fetch_osm_data(lat, lon, radius)
    return parse_osm_data(raw_data)

def dms_to_decimal(coord_str):
    # Regular expression to match the coordinate format
    pattern = r'(\d+)°(\d+)\'([\d.]+)"([NS])\s*(\d+)°(\d+)\'([\d.]+)"([EW])'
    
    match = re.match(pattern, coord_str)
    if not match:
        raise ValueError("Invalid coordinate format. Expected format: 19°03'08.6\"N 72°54'06.0\"E")

    lat_deg, lat_min, lat_sec, lat_dir, lon_deg, lon_min, lon_sec, lon_dir = match.groups()

    # Convert to decimal degrees
    lat = float(lat_deg) + float(lat_min)/60 + float(lat_sec)/3600
    lon = float(lon_deg) + float(lon_min)/60 + float(lon_sec)/3600

    # Adjust sign based on direction
    if lat_dir == 'S':
        lat = -lat
    if lon_dir == 'W':
        lon = -lon

    return lat, lon


def calculate_distant_points(lat: float, lon: float, distance: float) -> tuple:
    # Earth's radius in meters
    R = 6371000

    # Convert latitude and longitude to radians
    lat_rad = math.radians(lat)
    lon_rad = math.radians(lon)

    # Calculate the point with the same latitude (moving east-west)
    delta_lon = distance / (R * math.cos(lat_rad))
    lon1 = lon + math.degrees(delta_lon)
    
    # Calculate the point with the same longitude (moving north-south)
    delta_lat = distance / R
    lat2 = lat + math.degrees(delta_lat)

    return ((lat, lon1), (lat2, lon))

## 2d map grid (0,0) --> bottom left

def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, float], rows: int, cols: int) -> List[List[Tuple[float, float]]]:
    grid = []
    lat_unit = (top_right[0] - bottom_left[0]) / rows
    lon_unit = (top_right[1] - bottom_left[1]) / cols
    
    for i in range(rows):
        row = []
        for j in range(cols):
            lat = bottom_left[0] + i * lat_unit
            lon = bottom_left[1] + j * lon_unit
            lat = lat + lat_unit / 2
            lon = lon + lon_unit / 2
            row.append((lat, lon))
        grid.append(row)
    
    return grid

## entire pipeline

# left_lat = 18.889833
# left_lon = 72.779844
# dist = 35

def input_filter(lat=None, lon=None, string=None):
    if lat != None:
        return (lat, lon)
    elif string != None:
        latitude, longitude = dms_to_decimal(string)
        return (latitude, longitude)
    else:
        return None

def get_data(bottom_left_lat, bottom_left_lon, dist):

    result =  calculate_distant_points(bottom_left_lat, bottom_left_lon, 1000*dist)

    top_right_lat = result[1][0]
    top_right_lon = result[0][1]
    grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)

    grid_dataset = []
    for i, row in enumerate(grid):
        for j, point in enumerate(row):
            result_df = get_osm_data(point[0], point[1], 710)
            # print(result_df.head(3))
            labelled_df = result_df[result_df['Location Type'] != 'Other']
            labelled_df = labelled_df[labelled_df['Location Type'] != 'Religious']
            labelled_df = labelled_df[labelled_df['Location Type'] != 'Transportation']
            loc_types = []
            for row in labelled_df.iterrows():
                loc_type = (row[1]['Location Name'], row[1]['Location Type'])
                if loc_type not in loc_types:
                    loc_types.append(loc_type)

            labelled_df = pd.DataFrame(loc_types, columns=['Location Name', 'Location Type'])

            row_of_dataset = ''

            for row in labelled_df.iterrows():
                row_text = row[1]['Location Name'] + ' is a ' + row[1]['Location Type']
                row_of_dataset += row_text + '; '
            ## replacing any coma in the text with a blank space

            row_of_dataset = row_of_dataset.replace(',', ' ')
            
            grid_row = {"row": i, "col": j, "latitude": point[0], "longitude": point[1], "Map Data": row_of_dataset}
            grid_dataset.append(grid_row)

    grid_df = pd.DataFrame(grid_dataset)
    return grid_df
    # grid_df.to_csv('MMR_DATASET.csv', index=False)