Luecke commited on
Commit
2bc451d
·
1 Parent(s): 489e21a

postprocess ready for merging

Browse files
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  .DS_Store
2
- HelperScripts/input/
3
- HelperScripts/output/
4
  polygons_processing/output.geojson
 
 
 
 
1
  .DS_Store
 
 
2
  polygons_processing/output.geojson
3
+ detectree2/data/
4
+ detectree2/models/
5
+ detectree2/predictions/train_outputs
polygons_processing/polygons_merge_algo.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c46dc5367f92a1790a5ee1382ea76909da74e55fc414a107f477719c96f8c082
3
- size 36039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:011624c558fe8cee49c30f0e7b907e0f11065c04e870d9a2492a0d1fb3fa64d8
3
+ size 29589
polygons_processing/postpprocess_detectree2.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from shapely.geometry import Polygon, Point
3
+ from shapely.ops import unary_union
4
+ import matplotlib.pyplot as plt
5
+ from matplotlib.patches import Polygon as MplPolygon
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+
10
+ def add_extreme_coordinates(polygon_data):
11
+ polygon_coords = np.array(polygon_data["geometry"]["coordinates"][0])
12
+
13
+ polygon_data["geometry"]["max_lat"] = max(polygon_coords[:, 1])
14
+ polygon_data["geometry"]["min_lat"] = min(polygon_coords[:, 1])
15
+ polygon_data["geometry"]["max_lon"] = max(polygon_coords[:, 0])
16
+ polygon_data["geometry"]["min_lon"] = min(polygon_coords[:, 0])
17
+
18
+
19
+ def turn_into_dataframe(data):
20
+ data_list = data["features"]
21
+
22
+ for i in range(len(data_list)):
23
+ add_extreme_coordinates(data_list[i])
24
+
25
+ df = pd.DataFrame(data_list).drop(columns="type")
26
+
27
+ dict_cols = ["properties", "geometry"]
28
+ for dict_col in dict_cols:
29
+ dict_df = pd.json_normalize(df[dict_col])
30
+ # Merge the new columns back into the original DataFrame
31
+ df = df.drop(columns=[dict_col]).join(dict_df)
32
+ df["coordinates"] = df["coordinates"].apply(lambda x: x[0])
33
+ df["polygon"] = df["coordinates"].apply(lambda x: Polygon(x))
34
+
35
+ df = df.drop(columns=["type"])
36
+ return df
37
+
38
+ # Function to plot a polygon
39
+ def plot_polygon(ax, polygon, color, label="label"):
40
+ if not polygon.is_empty:
41
+ x, y = polygon.exterior.xy
42
+ ax.fill(x, y, color=color, alpha=0.5, label=label)
43
+
44
+
45
+ def plot_polygons(list_polygons, first_one_different=False, dpi=150):
46
+ # Plot the polygons and their intersection
47
+ plt.figure(dpi=dpi)
48
+ fig, ax = plt.subplots()
49
+
50
+ if first_one_different:
51
+ plot_polygon(ax, list_polygons[0], "red", f"polygon {0}")
52
+ for i, polygon in enumerate(list_polygons[1:]):
53
+ plot_polygon(ax, polygon, "blue", f"polygon {i}")
54
+ else:
55
+ for i, polygon in enumerate(list_polygons):
56
+ plot_polygon(ax, polygon, "blue", f"polygon {i}")
57
+
58
+ # Plot the intersection
59
+ # plot_polygon(ax, intersection, 'red', 'Intersection')
60
+
61
+ # Add legend
62
+ # ax.legend()
63
+
64
+ # Set axis limits
65
+ ax.set_aspect("equal")
66
+
67
+ # Set title
68
+ ax.set_title("Polygons and their Intersection")
69
+ plt.ylabel("lat")
70
+ plt.xlabel("lon")
71
+
72
+ plt.show()
73
+
74
+
75
+ def plot_polygons_with_colors(list_polygons, list_colors, dpi=150):
76
+ # Plot the polygons and their intersection
77
+ plt.figure(dpi=dpi)
78
+ fig, ax = plt.subplots()
79
+
80
+ for polygon, color in zip(list_polygons, list_colors):
81
+ plot_polygon(ax, polygon, color)
82
+
83
+ # Set axis limits
84
+ ax.set_aspect("equal")
85
+
86
+ # Set title
87
+ ax.set_title("Polygons and their Intersection")
88
+ plt.ylabel("lat")
89
+ plt.xlabel("lon")
90
+
91
+ plt.show()
92
+
93
+
94
+ def plot_polygons_from_df(df, dpi=150):
95
+ list_polygons = []
96
+ for index, row in df.iterrows():
97
+ list_polygons.append(row["polygon"])
98
+ plot_polygons(list_polygons=list_polygons, dpi=dpi)
99
+
100
+
101
+ def map_color(id):
102
+ return "blue"
103
+
104
+
105
+ def plot_polygons_from_df_with_color(df, dpi=150):
106
+
107
+ df["plot_colors"] = df["id"].apply(map_color)
108
+ list_polygons = []
109
+ list_colors = []
110
+ for index, row in df.iterrows():
111
+ list_polygons.append(row["polygon"])
112
+ list_colors.append(row["plot_colors"])
113
+ plot_polygons_with_colors(
114
+ list_polygons=list_polygons, list_colors=list_colors, dpi=dpi
115
+ )
116
+
117
+ def intersection(polygon, polygon_comparison):
118
+ return polygon.intersection(polygon_comparison)
119
+
120
+
121
+ def intersection_area(polygon, polygon_comparison):
122
+ return intersection(polygon, polygon_comparison).area
123
+
124
+
125
+ def intersection_area_ratio(polygon, polygon_comparison):
126
+ return intersection_area(polygon, polygon_comparison) / polygon.area
127
+
128
+ def containsPoint(polygonB, polygon):
129
+ coordinatesB = get_coordinates(polygonB)
130
+ for coord in coordinatesB:
131
+ coord = Point(coord)
132
+ if polygon.contains(coord):
133
+ return True
134
+ else:
135
+ return False
136
+
137
+ def get_coordinates(polygon):
138
+ coordinates = polygon.exterior.coords
139
+ coordinates = [list(pair) for pair in coordinates]
140
+ return coordinates
141
+
142
+ def mark_id_to_be_dropped(df, id_string):
143
+ df.loc[df['id']== id_string , 'to_drop'] = True
144
+
145
+ def mark_id_to_be_merged(df, id_string):
146
+ df.loc[df['id']== id_string , 'to_merge'] = True
147
+
148
+ def calc_overlapping_subset(df_input, index):
149
+ max_lat = df_input.iloc[index]['max_lat']
150
+ min_lat = df_input.iloc[index]['min_lat']
151
+ max_lon = df_input.iloc[index]['max_lon']
152
+ min_lon = df_input.iloc[index]['min_lon']
153
+ relevant_subset = df_input.loc[( (( ((max_lat < df_input['max_lat']) & (max_lat > df_input['min_lat'])) | \
154
+ ((min_lat < df_input['max_lat']) & (min_lat > df_input['min_lat'])) )| \
155
+ ( ((df_input['max_lat'] < max_lat) & (df_input['max_lat'] > min_lat)) | \
156
+ ((df_input['min_lat'] > min_lat ) & ( df_input['min_lat'] < max_lat)) ) ) & \
157
+ (( ( ((max_lon < df_input['max_lon']) & (max_lon > df_input['min_lon'])) | \
158
+ ((min_lon < df_input['max_lon']) & (min_lon > df_input['min_lon'])) ) ) |
159
+ ( ((df_input['max_lon'] < max_lon ) & (df_input['max_lon'] > min_lon)) | \
160
+ ((df_input['min_lon'] > min_lon) & (df_input['min_lon'] < max_lon)) ) ) )]
161
+ return relevant_subset
162
+
163
+ def remove_contained_poylgons(df_input):
164
+ df_result = df_input.copy()
165
+
166
+ for i in range (len(df_result)):
167
+
168
+ polygonA = df_input.iloc[i]['polygon']
169
+
170
+ #relevant_subset = df_result[df_result['polygon'].apply(lambda polygonB: containsPoint(polygonA, polygonB))]
171
+ #relevant_subset = relevant_subset[relevant_subset['id'] != df_input.iloc[i]['id']]
172
+ relevant_subset = calc_overlapping_subset(df_input = df_result, index = i)
173
+
174
+ # Experiment with this parameter to find the best threshold
175
+ # It certainly has to be smaller than 0.9
176
+ threshold = 0.85
177
+ for j in range(len(relevant_subset)):
178
+ ratio_current_choice = intersection_area_ratio(polygon = polygonA, polygon_comparison = relevant_subset.iloc[j]['polygon'])
179
+ ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygonA)
180
+ if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold): # or ratio_alternative_choice > threashold:
181
+ if polygonA.area > relevant_subset.iloc[j]['polygon'].area:
182
+ mark_id_to_be_dropped(df=df_result, id_string = relevant_subset.iloc[j]['id'])
183
+ else:
184
+ mark_id_to_be_dropped(df=df_result, id_string = df_input.iloc[i]['id'])
185
+
186
+ #remove all polygons that had a marked id
187
+ df_result = df_result.loc[df_result["to_drop"] == False]
188
+ return df_result
189
+
190
+ def merge(df_input, polygon_index, merge_subset):
191
+ for j in range(len(merge_subset)):
192
+ #merge merged_polygon with j-th polygon in merge_subset
193
+ #delete j_th polygon in merge_subset from df_input
194
+ merged_polygon = df_input.iloc[polygon_index]
195
+ merged_polygon_id = df_input.iloc[polygon_index]['id']
196
+ merged_polygon_index = merged_polygon.index
197
+
198
+ #change by merge --> polygon, coordinates, min/max long lat, score (use max or min or avg)
199
+ tmp = merged_polygon['polygon'].union(merge_subset.iloc[j]['polygon'])
200
+ merged_coordinates = list(tmp.exterior.coords)
201
+ merged_polygon = Polygon(merged_coordinates) #new polygon
202
+
203
+ coordinates = [list(tup) for tup in merged_coordinates] #new coordinates
204
+ #updating min/max long/lat
205
+ min_lon = min([point[0] for point in coordinates])
206
+ max_lon = max([point[0] for point in coordinates])
207
+ min_lat = min([point[1] for point in coordinates])
208
+ max_lat = max([point[1] for point in coordinates])
209
+ polygon_score = merge_subset.iloc[j]['Confidence_score']
210
+
211
+ #updating merged polygon
212
+ df_input.loc[df_input['id'] == merged_polygon_id,'polygon'] = merged_polygon
213
+ df_input.loc[df_input['id'] == merged_polygon_id,'min_lon'] = min_lon
214
+ df_input.loc[df_input['id'] == merged_polygon_id,'max_lon'] = max_lon
215
+ df_input.loc[df_input['id'] == merged_polygon_id,'min_lat'] = min_lat
216
+ df_input.loc[df_input['id'] == merged_polygon_id,'max_lat'] = max_lat
217
+ df_input.loc[df_input['id'] == merged_polygon_id,'Confidence_score'] = (df_input.iloc[polygon_index]['Confidence_score'] + polygon_score)/2
218
+ df_input.loc[df_input['id'] == merged_polygon_id, 'coordinates'] = df_input.loc[df_input['id'] == merged_polygon_id, 'polygon'].apply(get_coordinates)
219
+ df_input = df_input.loc[df_input['id'] != merge_subset.iloc[j]['id']]
220
+ return df_input
221
+
222
+
223
+ def merge_overlapping(df_input):
224
+ # Experiment with this parameter to get the best results
225
+ threshold = 0.40
226
+ #df_result = df_input.copy()
227
+
228
+ for i in range(len(df_input)):
229
+ polygon = df_input.iloc[i]['polygon']
230
+ relevant_subset = calc_overlapping_subset(df_input=df_input, index=i)
231
+ toBeMerged = False
232
+ for j in range(len(relevant_subset)):
233
+ ratio_current_choice = intersection_area_ratio(polygon = polygon, polygon_comparison = relevant_subset.iloc[j]['polygon'])
234
+ ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygon)
235
+ if (ratio_current_choice > threshold) or (ratio_alternative_choice > threshold):
236
+ toBeMerged = True
237
+ mark_id_to_be_merged(df=relevant_subset, id_string = relevant_subset.iloc[j]['id'])
238
+
239
+ if toBeMerged:
240
+ # deleting is handled in this funciton as well
241
+ df_input = merge(df_input=df_input, polygon_index=i, merge_subset=relevant_subset[relevant_subset['to_merge']==True])
242
+ return True, df_input
243
+
244
+ return False, df_input
245
+
246
+
247
+ def process(list_df):
248
+ df_res = pd.concat(list_df)
249
+ df_res = remove_contained_poylgons(df_input= df_res)
250
+ i = 0
251
+ merged, df_res = merge_overlapping(df_input=df_res)
252
+ while(merged):
253
+ i+=1
254
+ if i%100 == 0:
255
+ print(i)
256
+ merged, df_res = merge_overlapping(df_input=df_res)
257
+ return df_res
258
+
259
+
260
+ def combine_different_tile_size(df_smaller, df_bigger):
261
+
262
+ df_result = df_bigger.copy()
263
+
264
+ for i in range(len(df_smaller)):
265
+ max_lat = df_smaller.iloc[i]["max_lat"]
266
+ min_lat = df_smaller.iloc[i]["min_lat"]
267
+ max_lon = df_smaller.iloc[i]["max_lon"]
268
+ min_lon = df_smaller.iloc[i]["min_lon"]
269
+
270
+ polygon = df_smaller.iloc[i]["polygon"]
271
+
272
+ relevant_subset = df_bigger.loc[
273
+ (
274
+ ((max_lat < df_bigger["max_lat"]) & (max_lat > df_bigger["min_lat"]))
275
+ | ((min_lat < df_bigger["max_lat"]) & (min_lat > df_bigger["min_lat"]))
276
+ )
277
+ & (
278
+ ((max_lon < df_bigger["max_lon"]) & (max_lon > df_bigger["min_lon"]))
279
+ | ((min_lon < df_bigger["max_lon"]) & (min_lon > df_bigger["min_lon"]))
280
+ )
281
+ ]
282
+
283
+ list_polygons = [polygon]
284
+
285
+ for index, row in relevant_subset.iterrows():
286
+ list_polygons.append(row["polygon"])
287
+
288
+ add_polygon = True
289
+ threashold = 0.15
290
+ for comparison_polygon in list_polygons[1:]:
291
+ ratio = intersection_area_ratio(polygon, comparison_polygon)
292
+ if ratio > threashold:
293
+ add_polygon = False
294
+
295
+ if add_polygon:
296
+ # df_result = pd.concat([df_result, df_result.iloc[[i]]], axis= 1, ignore_index=True)#df_result.append(df_result.iloc[i], ignore_index=True)
297
+ df_result = pd.concat(
298
+ [df_result, df_smaller.iloc[[i]]], axis=0, join="outer"
299
+ ) #
300
+
301
+ return df_result
302
+
303
+
304
+ def clean(df, score_threashold=0.5):
305
+ df = df.loc[df["score"] > score_threashold]
306
+ return df
307
+
308
+ def row_to_feature(row):
309
+ feature = {
310
+ "id": row["id"],
311
+ "type": "Feature",
312
+ "properties": {"Confidence_score": row["Confidence_score"]},
313
+ "geometry": {"type": "Polygon", "coordinates": [row["coordinates"]]},
314
+ }
315
+ return feature
316
+
317
+
318
+ def export_df_as_geojson(df, filename="output"):
319
+ features = [row_to_feature(row) for idx, row in df.iterrows()]
320
+
321
+ feature_collection = {
322
+ "type": "FeatureCollection",
323
+ "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::32720"}},
324
+ "features": features,
325
+ }
326
+
327
+ output_geojson = json.dumps(feature_collection)
328
+
329
+ with open(f"{filename}.geojson", "w") as f:
330
+ f.write(output_geojson)
331
+
332
+ print(f"GeoJSON data exported to '{filename}.geojson' file.")
333
+
334
+ def convert_id_to_string(prefix, x):
335
+ return prefix + str(x)
336
+
337
+ def postprocess(prediction_geojson_path):
338
+ with open(prediction_geojson_path,"r",) as file:
339
+ prediction_data = json.load(file)
340
+
341
+ df = turn_into_dataframe(prediction_data)
342
+
343
+ df["id"] = df.index
344
+
345
+ df['Confidence_score'] = df['Confidence_score'].astype(float)
346
+
347
+ df["id"] = df["id"].apply(lambda x: convert_id_to_string("df_", x))
348
+
349
+ df["to_drop"] = False
350
+ df["to_merge"] = False
351
+
352
+ df_res = process([df])
353
+
354
+ export_df_as_geojson(df=df_res, filename="postprocessed_predictions")