postprocess ready for merging
Browse files
@@ -1,4 +1,5 @@
1 |
2 |
3 |
4 |
1 |
2 |
3 |
4 |
5 |
@@ -1,3 +1,3 @@
1 |
2 |
oid sha256:
3 |
1 |
2 |
oid sha256:011624c558fe8cee49c30f0e7b907e0f11065c04e870d9a2492a0d1fb3fa64d8
3 |
size 29589
@@ -0,0 +1,354 @@
1 |
import json
2 |
from shapely.geometry import Polygon, Point
3 |
from shapely.ops import unary_union
4 |
import matplotlib.pyplot as plt
5 |
from matplotlib.patches import Polygon as MplPolygon
6 |
import numpy as np
7 |
import pandas as pd
8 |
9 |
10 |
def add_extreme_coordinates(polygon_data):
11 |
polygon_coords = np.array(polygon_data["geometry"]["coordinates"][0])
12 |
13 |
polygon_data["geometry"]["max_lat"] = max(polygon_coords[:, 1])
14 |
polygon_data["geometry"]["min_lat"] = min(polygon_coords[:, 1])
15 |
polygon_data["geometry"]["max_lon"] = max(polygon_coords[:, 0])
16 |
polygon_data["geometry"]["min_lon"] = min(polygon_coords[:, 0])
17 |
18 |
19 |
def turn_into_dataframe(data):
20 |
data_list = data["features"]
21 |
22 |
for i in range(len(data_list)):
23 |
24 |
25 |
df = pd.DataFrame(data_list).drop(columns="type")
26 |
27 |
dict_cols = ["properties", "geometry"]
28 |
for dict_col in dict_cols:
29 |
dict_df = pd.json_normalize(df[dict_col])
30 |
# Merge the new columns back into the original DataFrame
31 |
df = df.drop(columns=[dict_col]).join(dict_df)
32 |
df["coordinates"] = df["coordinates"].apply(lambda x: x[0])
33 |
df["polygon"] = df["coordinates"].apply(lambda x: Polygon(x))
34 |
35 |
df = df.drop(columns=["type"])
36 |
return df
37 |
38 |
# Function to plot a polygon
39 |
def plot_polygon(ax, polygon, color, label="label"):
40 |
if not polygon.is_empty:
41 |
x, y = polygon.exterior.xy
42 |
ax.fill(x, y, color=color, alpha=0.5, label=label)
43 |
44 |
45 |
def plot_polygons(list_polygons, first_one_different=False, dpi=150):
46 |
# Plot the polygons and their intersection
47 |
48 |
fig, ax = plt.subplots()
49 |
50 |
if first_one_different:
51 |
plot_polygon(ax, list_polygons[0], "red", f"polygon {0}")
52 |
for i, polygon in enumerate(list_polygons[1:]):
53 |
plot_polygon(ax, polygon, "blue", f"polygon {i}")
54 |
55 |
for i, polygon in enumerate(list_polygons):
56 |
plot_polygon(ax, polygon, "blue", f"polygon {i}")
57 |
58 |
# Plot the intersection
59 |
# plot_polygon(ax, intersection, 'red', 'Intersection')
60 |
61 |
# Add legend
62 |
# ax.legend()
63 |
64 |
# Set axis limits
65 |
66 |
67 |
# Set title
68 |
ax.set_title("Polygons and their Intersection")
69 |
70 |
71 |
72 |
73 |
74 |
75 |
def plot_polygons_with_colors(list_polygons, list_colors, dpi=150):
76 |
# Plot the polygons and their intersection
77 |
78 |
fig, ax = plt.subplots()
79 |
80 |
for polygon, color in zip(list_polygons, list_colors):
81 |
plot_polygon(ax, polygon, color)
82 |
83 |
# Set axis limits
84 |
85 |
86 |
# Set title
87 |
ax.set_title("Polygons and their Intersection")
88 |
89 |
90 |
91 |
92 |
93 |
94 |
def plot_polygons_from_df(df, dpi=150):
95 |
list_polygons = []
96 |
for index, row in df.iterrows():
97 |
98 |
plot_polygons(list_polygons=list_polygons, dpi=dpi)
99 |
100 |
101 |
def map_color(id):
102 |
return "blue"
103 |
104 |
105 |
def plot_polygons_from_df_with_color(df, dpi=150):
106 |
107 |
df["plot_colors"] = df["id"].apply(map_color)
108 |
list_polygons = []
109 |
list_colors = []
110 |
for index, row in df.iterrows():
111 |
112 |
113 |
114 |
list_polygons=list_polygons, list_colors=list_colors, dpi=dpi
115 |
116 |
117 |
def intersection(polygon, polygon_comparison):
118 |
return polygon.intersection(polygon_comparison)
119 |
120 |
121 |
def intersection_area(polygon, polygon_comparison):
122 |
return intersection(polygon, polygon_comparison).area
123 |
124 |
125 |
def intersection_area_ratio(polygon, polygon_comparison):
126 |
return intersection_area(polygon, polygon_comparison) / polygon.area
127 |
128 |
def containsPoint(polygonB, polygon):
129 |
coordinatesB = get_coordinates(polygonB)
130 |
for coord in coordinatesB:
131 |
coord = Point(coord)
132 |
if polygon.contains(coord):
133 |
return True
134 |
135 |
return False
136 |
137 |
def get_coordinates(polygon):
138 |
coordinates = polygon.exterior.coords
139 |
coordinates = [list(pair) for pair in coordinates]
140 |
return coordinates
141 |
142 |
def mark_id_to_be_dropped(df, id_string):
143 |
df.loc[df['id']== id_string , 'to_drop'] = True
144 |
145 |
def mark_id_to_be_merged(df, id_string):
146 |
df.loc[df['id']== id_string , 'to_merge'] = True
147 |
148 |
def calc_overlapping_subset(df_input, index):
149 |
max_lat = df_input.iloc[index]['max_lat']
150 |
min_lat = df_input.iloc[index]['min_lat']
151 |
max_lon = df_input.iloc[index]['max_lon']
152 |
min_lon = df_input.iloc[index]['min_lon']
153 |
relevant_subset = df_input.loc[( (( ((max_lat < df_input['max_lat']) & (max_lat > df_input['min_lat'])) | \
154 |
((min_lat < df_input['max_lat']) & (min_lat > df_input['min_lat'])) )| \
155 |
( ((df_input['max_lat'] < max_lat) & (df_input['max_lat'] > min_lat)) | \
156 |
((df_input['min_lat'] > min_lat ) & ( df_input['min_lat'] < max_lat)) ) ) & \
157 |
(( ( ((max_lon < df_input['max_lon']) & (max_lon > df_input['min_lon'])) | \
158 |
((min_lon < df_input['max_lon']) & (min_lon > df_input['min_lon'])) ) ) |
159 |
( ((df_input['max_lon'] < max_lon ) & (df_input['max_lon'] > min_lon)) | \
160 |
((df_input['min_lon'] > min_lon) & (df_input['min_lon'] < max_lon)) ) ) )]
161 |
return relevant_subset
162 |
163 |
def remove_contained_poylgons(df_input):
164 |
df_result = df_input.copy()
165 |
166 |
for i in range (len(df_result)):
167 |
168 |
polygonA = df_input.iloc[i]['polygon']
169 |
170 |
#relevant_subset = df_result[df_result['polygon'].apply(lambda polygonB: containsPoint(polygonA, polygonB))]
171 |
#relevant_subset = relevant_subset[relevant_subset['id'] != df_input.iloc[i]['id']]
172 |
relevant_subset = calc_overlapping_subset(df_input = df_result, index = i)
173 |
174 |
# Experiment with this parameter to find the best threshold
175 |
# It certainly has to be smaller than 0.9
176 |
threshold = 0.85
177 |
for j in range(len(relevant_subset)):
178 |
ratio_current_choice = intersection_area_ratio(polygon = polygonA, polygon_comparison = relevant_subset.iloc[j]['polygon'])
179 |
ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygonA)
180 |
if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold): # or ratio_alternative_choice > threashold:
181 |
if polygonA.area > relevant_subset.iloc[j]['polygon'].area:
182 |
mark_id_to_be_dropped(df=df_result, id_string = relevant_subset.iloc[j]['id'])
183 |
184 |
mark_id_to_be_dropped(df=df_result, id_string = df_input.iloc[i]['id'])
185 |
186 |
#remove all polygons that had a marked id
187 |
df_result = df_result.loc[df_result["to_drop"] == False]
188 |
return df_result
189 |
190 |
def merge(df_input, polygon_index, merge_subset):
191 |
for j in range(len(merge_subset)):
192 |
#merge merged_polygon with j-th polygon in merge_subset
193 |
#delete j_th polygon in merge_subset from df_input
194 |
merged_polygon = df_input.iloc[polygon_index]
195 |
merged_polygon_id = df_input.iloc[polygon_index]['id']
196 |
merged_polygon_index = merged_polygon.index
197 |
198 |
#change by merge --> polygon, coordinates, min/max long lat, score (use max or min or avg)
199 |
tmp = merged_polygon['polygon'].union(merge_subset.iloc[j]['polygon'])
200 |
merged_coordinates = list(tmp.exterior.coords)
201 |
merged_polygon = Polygon(merged_coordinates) #new polygon
202 |
203 |
coordinates = [list(tup) for tup in merged_coordinates] #new coordinates
204 |
#updating min/max long/lat
205 |
min_lon = min([point[0] for point in coordinates])
206 |
max_lon = max([point[0] for point in coordinates])
207 |
min_lat = min([point[1] for point in coordinates])
208 |
max_lat = max([point[1] for point in coordinates])
209 |
polygon_score = merge_subset.iloc[j]['Confidence_score']
210 |
211 |
#updating merged polygon
212 |
df_input.loc[df_input['id'] == merged_polygon_id,'polygon'] = merged_polygon
213 |
df_input.loc[df_input['id'] == merged_polygon_id,'min_lon'] = min_lon
214 |
df_input.loc[df_input['id'] == merged_polygon_id,'max_lon'] = max_lon
215 |
df_input.loc[df_input['id'] == merged_polygon_id,'min_lat'] = min_lat
216 |
df_input.loc[df_input['id'] == merged_polygon_id,'max_lat'] = max_lat
217 |
df_input.loc[df_input['id'] == merged_polygon_id,'Confidence_score'] = (df_input.iloc[polygon_index]['Confidence_score'] + polygon_score)/2
218 |
df_input.loc[df_input['id'] == merged_polygon_id, 'coordinates'] = df_input.loc[df_input['id'] == merged_polygon_id, 'polygon'].apply(get_coordinates)
219 |
df_input = df_input.loc[df_input['id'] != merge_subset.iloc[j]['id']]
220 |
return df_input
221 |
222 |
223 |
def merge_overlapping(df_input):
224 |
# Experiment with this parameter to get the best results
225 |
threshold = 0.40
226 |
#df_result = df_input.copy()
227 |
228 |
for i in range(len(df_input)):
229 |
polygon = df_input.iloc[i]['polygon']
230 |
relevant_subset = calc_overlapping_subset(df_input=df_input, index=i)
231 |
toBeMerged = False
232 |
for j in range(len(relevant_subset)):
233 |
ratio_current_choice = intersection_area_ratio(polygon = polygon, polygon_comparison = relevant_subset.iloc[j]['polygon'])
234 |
ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygon)
235 |
if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold):
236 |
toBeMerged = True
237 |
mark_id_to_be_merged(df=relevant_subset, id_string = relevant_subset.iloc[j]['id'])
238 |
239 |
if toBeMerged:
240 |
# deleting is handled in this funciton as well
241 |
df_input = merge(df_input=df_input, polygon_index=i, merge_subset=relevant_subset[relevant_subset['to_merge']==True])
242 |
return True, df_input
243 |
244 |
return False, df_input
245 |
246 |
247 |
def process(list_df):
248 |
df_res = pd.concat(list_df)
249 |
df_res = remove_contained_poylgons(df_input= df_res)
250 |
i = 0
251 |
merged, df_res = merge_overlapping(df_input=df_res)
252 |
253 |
254 |
if i%100 == 0:
255 |
256 |
merged, df_res = merge_overlapping(df_input=df_res)
257 |
return df_res
258 |
259 |
260 |
def combine_different_tile_size(df_smaller, df_bigger):
261 |
262 |
df_result = df_bigger.copy()
263 |
264 |
for i in range(len(df_smaller)):
265 |
max_lat = df_smaller.iloc[i]["max_lat"]
266 |
min_lat = df_smaller.iloc[i]["min_lat"]
267 |
max_lon = df_smaller.iloc[i]["max_lon"]
268 |
min_lon = df_smaller.iloc[i]["min_lon"]
269 |
270 |
polygon = df_smaller.iloc[i]["polygon"]
271 |
272 |
relevant_subset = df_bigger.loc[
273 |
274 |
((max_lat < df_bigger["max_lat"]) & (max_lat > df_bigger["min_lat"]))
275 |
| ((min_lat < df_bigger["max_lat"]) & (min_lat > df_bigger["min_lat"]))
276 |
277 |
& (
278 |
((max_lon < df_bigger["max_lon"]) & (max_lon > df_bigger["min_lon"]))
279 |
| ((min_lon < df_bigger["max_lon"]) & (min_lon > df_bigger["min_lon"]))
280 |
281 |
282 |
283 |
list_polygons = [polygon]
284 |
285 |
for index, row in relevant_subset.iterrows():
286 |
287 |
288 |
add_polygon = True
289 |
threashold = 0.15
290 |
for comparison_polygon in list_polygons[1:]:
291 |
ratio = intersection_area_ratio(polygon, comparison_polygon)
292 |
if ratio > threashold:
293 |
add_polygon = False
294 |
295 |
if add_polygon:
296 |
# df_result = pd.concat([df_result, df_result.iloc[[i]]], axis= 1, ignore_index=True)#df_result.append(df_result.iloc[i], ignore_index=True)
297 |
df_result = pd.concat(
298 |
[df_result, df_smaller.iloc[[i]]], axis=0, join="outer"
299 |
) #
300 |
301 |
return df_result
302 |
303 |
304 |
def clean(df, score_threashold=0.5):
305 |
df = df.loc[df["score"] > score_threashold]
306 |
return df
307 |
308 |
def row_to_feature(row):
309 |
feature = {
310 |
"id": row["id"],
311 |
"type": "Feature",
312 |
"properties": {"Confidence_score": row["Confidence_score"]},
313 |
"geometry": {"type": "Polygon", "coordinates": [row["coordinates"]]},
314 |
315 |
return feature
316 |
317 |
318 |
def export_df_as_geojson(df, filename="output"):
319 |
features = [row_to_feature(row) for idx, row in df.iterrows()]
320 |
321 |
feature_collection = {
322 |
"type": "FeatureCollection",
323 |
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::32720"}},
324 |
"features": features,
325 |
326 |
327 |
output_geojson = json.dumps(feature_collection)
328 |
329 |
with open(f"{filename}.geojson", "w") as f:
330 |
331 |
332 |
print(f"GeoJSON data exported to '{filename}.geojson' file.")
333 |
334 |
def convert_id_to_string(prefix, x):
335 |
return prefix + str(x)
336 |
337 |
def postprocess(prediction_geojson_path):
338 |
with open(prediction_geojson_path,"r",) as file:
339 |
prediction_data = json.load(file)
340 |
341 |
df = turn_into_dataframe(prediction_data)
342 |
343 |
df["id"] = df.index
344 |
345 |
df['Confidence_score'] = df['Confidence_score'].astype(float)
346 |
347 |
df["id"] = df["id"].apply(lambda x: convert_id_to_string("df_", x))
348 |
349 |
df["to_drop"] = False
350 |
df["to_merge"] = False
351 |
352 |
df_res = process([df])
353 |
354 |
export_df_as_geojson(df=df_res, filename="postprocessed_predictions")