In [59]:
# -*- coding: utf-8 -*-
"""
Created on Tue May 21 13:44:53 2024

@author: morit
"""

import json
from shapely.geometry import Polygon
from shapely.ops import unary_union
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon as MplPolygon
import numpy as np
import pandas as pd

## The following cells just load the data, please replace with your own files and paths, you should read .geojson data, with the same content that the google drive folder provides

In [60]:
with open('/Users/jonathanseele/ETH/Hackathons/EcoHackathon/WeCanopy/detectree2/predictions/train_outputs/geojson/tile_40/crowns_out_0.3_0.2.geojson', 'r') as file:
    data_40 = json.load(file)

In [61]:
with open('/Users/jonathanseele/ETH/Hackathons/EcoHackathon/WeCanopy/detectree2/predictions/train_outputs/geojson/tile_20/crowns_out_0.3_0.2.geojson', 'r') as file:
    data_20 = json.load(file)

## This provides utilities to convert the data to dataframes for easier handling

In [62]:
def add_extreme_coordinates(polygon_data):
    polygon_coords = np.array(polygon_data['geometry']['coordinates'][0])
    
    polygon_data['geometry']['max_lat'] = max(polygon_coords[:,1])
    polygon_data['geometry']['min_lat'] = min(polygon_coords[:,1])
    polygon_data['geometry']['max_lon'] = max(polygon_coords[:,0])
    polygon_data['geometry']['min_lon'] = min(polygon_coords[:,0])


def turn_into_dataframe(data):
    data_list = data['features']
    
    for i in range(len(data_list)):    
        add_extreme_coordinates(data_list[i])
    
    df = pd.DataFrame(data_list).drop(columns='type')

    dict_cols = ['properties', 'geometry']
    for dict_col in dict_cols:
        dict_df = pd.json_normalize(df[dict_col])
        # Merge the new columns back into the original DataFrame
        df = df.drop(columns=[dict_col]).join(dict_df)
    df['coordinates'] = df['coordinates'].apply(lambda x : x[0])
    df['polygon'] = df['coordinates'].apply(lambda x : Polygon(x))

    df = df.drop(columns = ['type'])
    return df

## Some functions for plotting

In [63]:
# Function to plot a polygon
def plot_polygon(ax, polygon, color, label = "label"):
    if not polygon.is_empty:
        x, y = polygon.exterior.xy
        ax.fill(x, y, color=color, alpha=0.5, label=label)

def plot_polygons(list_polygons, first_one_different = False, dpi=150):        
    # Plot the polygons and their intersection
    plt.figure(dpi = dpi)
    fig, ax = plt.subplots()
    
    if first_one_different:
        plot_polygon(ax, list_polygons[0], 'red', f"polygon {0}")
        for i,polygon in enumerate(list_polygons[1:]):
            plot_polygon(ax, polygon, 'blue', f"polygon {i}")
    else:
        for i,polygon in enumerate(list_polygons):
            plot_polygon(ax, polygon, 'blue', f"polygon {i}")


    # Plot the intersection
    #plot_polygon(ax, intersection, 'red', 'Intersection')

    # Add legend
    #ax.legend()

    # Set axis limits
    ax.set_aspect('equal')

    # Set title
    ax.set_title('Polygons and their Intersection')
    plt.ylabel('lat')
    plt.xlabel('lon')

    plt.show()

def plot_polygons_with_colors(list_polygons, list_colors , dpi=150):        
    # Plot the polygons and their intersection
    plt.figure(dpi = dpi)
    fig, ax = plt.subplots()
    
    
        
    for polygon, color in zip(list_polygons, list_colors):
        plot_polygon(ax, polygon, color)

    # Set axis limits
    ax.set_aspect('equal')

    # Set title
    ax.set_title('Polygons and their Intersection')
    plt.ylabel('lat')
    plt.xlabel('lon')

    plt.show()

def plot_polygons_from_df(df, dpi = 150):
    list_polygons = []
    for index, row in df.iterrows():
        list_polygons.append(row['polygon'])
    plot_polygons(list_polygons=list_polygons, dpi = dpi)

def map_color(id):
    if id[:5] == "df_40":
        return 'blue'
    elif id[:5] == "df_32":
        return 'green'
    elif id[:5] == "df_24":
        return 'yellow'
    elif id[:5] == "df_16":
        return 'orange'
    elif id[:5] == "df_80":
        return 'red'
    else:
        return 'black'

def plot_polygons_from_df_with_color(df, dpi = 150):

    df['plot_colors'] = df['id'].apply(map_color)
    list_polygons = []
    list_colors = []
    for index, row in df.iterrows():
        list_polygons.append(row['polygon'])
        list_colors.append(row['plot_colors'])
    plot_polygons_with_colors(list_polygons=list_polygons, list_colors = list_colors, dpi = dpi)


## Functions for the combination of the layers

#### helper functions

In [64]:
def intersection(polygon, polygon_comparison):
    return polygon.intersection(polygon_comparison)

def intersection_area(polygon, polygon_comparison):
    return intersection(polygon, polygon_comparison).area

def intersection_area_ratio(polygon, polygon_comparison):
    return intersection_area(polygon, polygon_comparison) / polygon.area

### the actual combination of layers and doublet reduction

In [84]:
def mark_id_to_be_dropped(df, id):
    df.loc[df['id']== id, 'to_drop'] = True

def combine_different_tile_size(df_smaller, df_bigger):

    df_result = df_bigger.copy()

    for i in range (len(df_smaller)):
        max_lat = df_smaller.iloc[i]['max_lat']
        min_lat = df_smaller.iloc[i]['min_lat']
        max_lon = df_smaller.iloc[i]['max_lon']
        min_lon = df_smaller.iloc[i]['min_lon']
        
        polygon = df_smaller.iloc[i]['polygon']
        
        relevant_subset = df_bigger.loc[( ((max_lat < df_bigger['max_lat']) & (max_lat > df_bigger['min_lat'])) | \
                                    ((min_lat < df_bigger['max_lat']) & (min_lat > df_bigger['min_lat'])) ) & \
                                    ( ((max_lon < df_bigger['max_lon']) & (max_lon > df_bigger['min_lon'])) | \
                                    ((min_lon < df_bigger['max_lon']) & (min_lon > df_bigger['min_lon'])) ) ]
        
        list_polygons = [polygon]
        
        for index, row in relevant_subset.iterrows():
            list_polygons.append(row['polygon'])
                
        add_polygon = True
        threashold = 0.15
        for comparison_polygon in list_polygons[1:]:
            ratio = intersection_area_ratio(polygon, comparison_polygon)
            if ratio > threashold:
                add_polygon = False
        
        if add_polygon:
            df_result = pd.concat([df_result,df_smaller.iloc[[i]]], axis=0, join='outer')
        

    return df_result


def remove_doublicates_area(df_input):
    df_result = df_input.copy()

    df_result["to_drop"] = False

    for i in range (len(df_result)):
        max_lat = df_input.iloc[i]['max_lat']
        min_lat = df_input.iloc[i]['min_lat']
        max_lon = df_input.iloc[i]['max_lon']
        min_lon = df_input.iloc[i]['min_lon']
        
        polygon = df_input.iloc[i]['polygon']
        
        relevant_subset = df_input.loc[( ((max_lat < df_result['max_lat']) & (max_lat > df_result['min_lat'])) | \
                                    ((min_lat < df_result['max_lat']) & (min_lat > df_result['min_lat'])) ) & \
                                    ( ((max_lon < df_result['max_lon']) & (max_lon > df_result['min_lon'])) | \
                                    ((min_lon < df_result['max_lon']) & (min_lon > df_result['min_lon'])) ) ]

        threashold = 0.15
        for j in range(len(relevant_subset)):
            ratio_current_choice = intersection_area_ratio(polygon = polygon, polygon_comparison= relevant_subset.iloc[j]['polygon'])
            ratio_alternative_choice = intersection_area_ratio(polygon = relevant_subset.iloc[j]['polygon'], polygon_comparison= polygon)
            if  ratio_current_choice > threashold or ratio_alternative_choice > threashold: # or ratio_alternative_choice > threashold:

                if polygon.area >  relevant_subset.iloc[j]['polygon'].area:
                    mark_id_to_be_dropped(df=df_result, id = relevant_subset.iloc[j]['id'])               
                else:
                    mark_id_to_be_dropped(df=df_result, id = df_input.iloc[i]['id'])  
                    break  

    df_result = df_result.loc[df_result["to_drop"] == False]
    return df_result

    

    
def clean(df, score_threashold = 0.5):
    df = df.loc[df['score']> score_threashold ]
    return df

def process(list_df, score_threashold = 0.2):
    df_res = remove_doublicates_area(df_input= list_df[0])
    print('first data prepared')
    for i, df in enumerate( list_df[1:] ):
        df_res = combine_different_tile_size(df_smaller=df, df_bigger=df_res)
        print(i)
    
    df_res = remove_doublicates_area(df_input= df_res) # because somehow there are still doublicates before

    return df_res
            
        

## Functions needed to export the data as .geojson again

In [98]:
def row_to_feature(row):
    feature = {
        "id": row['id'],
        "type": "Feature",
        "properties": {
            "Confidence_score": row['Confidence_score']
        },
        "geometry": {
            "type": "Polygon",
            "coordinates": [row['coordinates']]
        }
    }
    return feature


def export_df_as_geojson(df, filename = 'output'):
    features = [row_to_feature(row) for idx, row in df.iterrows()]

    feature_collection = {
        "type": "FeatureCollection",
        "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::32720"} },
        "features": features
    }

    output_geojson = json.dumps(feature_collection)

    with open(f'{filename}.geojson', 'w') as f:
        f.write(output_geojson)

    print(f"GeoJSON data exported to '{filename}.geojson' file.")

# EXAMPLE: actual treatment of the data

In [92]:
df_40 = turn_into_dataframe(data_40)
df_20 = turn_into_dataframe(data_20)

df_40["id"] = df_40.index
df_20["id"] = df_20.index

def convert_id_to_string(prefix,x):
    return prefix + str(x)

df_40["id"] = df_40["id"].apply(lambda x : convert_id_to_string("df_40_", x))
df_20["id"] = df_20["id"].apply(lambda x : convert_id_to_string("df_20_", x))

In [None]:
df_40.head()

In [None]:
list_df = [df_40, df_20]
df_result = process(list_df)

In [None]:
display(df_result)

In [None]:
export_df_as_geojson(df = df_result, filename = 'result')

In [None]:
plot_polygons_from_df_with_color(df = df_result, dpi = 200)