Spaces:

Fer-geo
/

df_align

Sleeping

App Files Files Community

Fer-geo commited on Jan 9, 2024

Commit

4bdf2dd

1 Parent(s): 867194d

get gdf

Browse files

Files changed (4) hide show

app copy.py +202 -0
app.py +152 -117
data/lotes espacio crea_empresa.dbf +0 -0
data/obs_df_2023_12_1.csv +2 -2

app copy.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import streamlit as st
+import warnings
+warnings.filterwarnings('ignore')
+import pandas as pd
+import geopandas as gpd
+from difflib import get_close_matches
+import tempfile
+from io import BytesIO
+def convert_to_gdf(uploaded_file):
+    # Read the file using BytesIO
+    file_buffer = BytesIO(uploaded_file.read())
+    # Detect file type and load accordingly
+    if uploaded_file.name.endswith('.shp'):
+        gdf = gpd.read_file(file_buffer)
+    elif uploaded_file.name.endswith(('.geojson', '.json')):
+        gdf = gpd.read_file(file_buffer, driver='GeoJSON')
+    else:
+        raise ValueError("Unsupported file format")
+    return gdf
+# add logo D:\Terradot\repos\crea-carbon-model\app\logo.jpg
+st.sidebar.image('logo.jpg', width=200)
+st.sidebar.title('Proyecto Crea')
+st.sidebar.write('Solo uso interno')
+# add sidebar with 2 upload buttons
+st.sidebar.header('Upload Files')
+uploaded_file = st.sidebar.file_uploader('Upload your shapefile', type=['shp', 'geojson', 'json'], disabled = True)
+uploaded_file2 = st.sidebar.file_uploader('Upload your csv file', type=['csv'], disabled = True)
+if uploaded_file is not None:
+        lotes_gdf = convert_to_gdf(uploaded_file)
+        st.write(lotes_gdf)
+if uploaded_file2 is not None:
+    # read csv and create dataframe
+    obs_df_2023 = pd.read_csv(uploaded_file2)
+# add Test button
+test = True #st.sidebar.button('Test')
+if 'key' not in st.session_state:
+    st.session_state['key'] = None
+if 'lote_gdf' not in st.session_state:
+    st.session_state['lote_gdf'] = None
+if test:
+    lotes_gdf = gpd.read_file('data/lotes espacio crea_empresa.shp', encoding='utf-8')
+    obs_df_2023 = pd.read_csv('data/obs_df_2023_12_1.csv')
+    obs_df_2023.fillna('-', inplace=True)
+    obs_df_2023.Campo = obs_df_2023.Campo.astype('str')
+    empresa_obs = obs_df_2023.EMPRESA.unique().tolist()
+    # create a state variable to hold the current value of key variable
+    col1, col2,col3 = st.columns(3)
+    with col1:
+        st.header('EMPRESA')
+        selected_company = st.selectbox(f'Seleccione empresa', empresa_obs, index= 0)
+    # filter dataframe by selected company
+    obs_df_2023 = obs_df_2023[obs_df_2023['EMPRESA'] == selected_company]
+    if st.session_state['lote_gdf'] is not None:
+        lotes_gdf = st.session_state['lote_gdf']
+    else:
+        lotes_gdf = lotes_gdf[lotes_gdf['empresa'] == selected_company]
+        st.session_state['lote_gdf'] = lotes_gdf
+    campo_obs = obs_df_2023.Campo.unique().tolist()
+    campo_gdf = lotes_gdf.campo.unique().tolist()
+    # Initialize an empty dictionary
+    similar_dict = {}
+    N = 3
+    CUTOFF = 0.72
+    # Loop through each item in the template list
+    for item in campo_gdf:
+        # normalize the stings to lowercase and remove punctuation in campo_obs
+        campo_obs_norm = [str(c).lower() for c in campo_obs]
+        campo_obs_norm = [c.replace('.', ' ') for c in campo_obs_norm]
+        # Find the most similar item in df_columns list
+        similar_items = get_close_matches(item, campo_obs_norm, N, CUTOFF)
+        # get the index of the most similar item
+        similar_items_idx = [campo_obs_norm.index(i) for i in similar_items]
+        # get the most similar item in the original list
+        similar_items = [campo_obs[i] for i in similar_items_idx]
+        # If a similar item is found, add to the dictionary
+        if similar_items:
+            similar_dict[item] = similar_items[0]
+        else:
+            # If no similar item is found, set value as "no match"
+            similar_dict[item] = "no match"
+    similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index()
+    similar_dict_df.columns = ['gdf','obs']
+    # campo_obs = [str(c) for c in campo_obs]
+    # campo_obs.sort(key=str.lower)
+    campo_obs.insert(0, 'no match')
+    all_keys = similar_dict_df['gdf'].unique().tolist()
+    # Fields
+    lotes_gdf['campo_obs'] = lotes_gdf['campo'].map(similar_dict)
+    cutoff = 0.3
+    def on_click_field(*args):
+        # key, field, selected_value = key
+        def inner():
+            # st.session_state['key'] = key
+            print(args)
+        return inner
+    def show_field(key):
+        key, selected_value = key
+        lote_obs = obs_df_2023[obs_df_2023['Campo'] == selected_value]['Lote'].unique().tolist()
+        lote_obs.insert(0, 'no match')
+        with col3:
+            # st.header(st.session_state['key'])
+            st.header('Lote')
+            df_field = lotes_gdf[lotes_gdf['campo'] == key]
+            fields = df_field['lote'].unique().tolist()
+            for j,field in enumerate(fields):
+                similar_items = get_close_matches(field, lote_obs, 3, 0.70)
+                default = similar_items[0] if similar_items else 0
+                # selected_value = st.multiselect(f'{field} (.shp):', lote_obs, default=default, key='field'+str(j))
+                selected_value = st.selectbox(f'{field} (.shp):', lote_obs, index = lote_obs.index(default)  , key='field'+str(j), on_change=on_click_field(key, field, selected_value))
+                lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['lote'] == field), 'lote_obs'] = selected_value
+                # st.session_state['lote_gdf'] = lotes_gdf
+    def on_click(key):
+        def inner():
+            st.session_state['key'] = key
+            show_field(key)
+        return inner
+    with col2:
+        st.header('Campo')
+        for i, key in enumerate(all_keys):
+            selected_value = st.selectbox(f'{key}:', campo_obs, index=campo_obs.index(similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]))
+            # selected_value = st.multiselect(f'{key} (.shp):', campo_obs, default=similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0], key=i)
+            lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]
+            if selected_value:
+                similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = selected_value
+                value = selected_value
+                lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = selected_value
+            else:
+                # similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = 'no match'
+                value = similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs']
+                lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = value
+            st.session_state['lote_gdf'] = lotes_gdf
+            st.button('Show Fields', key=key, on_click=on_click([key,value]))
+        # st.dataframe(similar_dict_df)
+        # if st.button('Show Fields'):
+        #     st.dataframe(similar_dict_df)
+    # add download button
+st.sidebar.download_button(
+        label="Download GeoJSON",
+        data=lotes_gdf.to_json().encode('utf-8'),
+        file_name=f'{selected_company}.geojson',
+        # mime='text/csv',
+        mime = 'application/json',
+    )

app.py CHANGED Viewed

@@ -47,162 +47,197 @@ if uploaded_file2 is not None:
 # add Test button
 test = True #st.sidebar.button('Test')
 if test:
-    lotes_gdf = gpd.read_file('data/lotes espacio crea_empresa.shp', encoding='utf-8')
-    obs_df_2023 = pd.read_csv('data/obs_df_2023_12_1.csv')
-    obs_df_2023.fillna('-', inplace=True)
-    obs_df_2023.Campo = obs_df_2023.Campo.astype('str')
-    empresa_obs = obs_df_2023.EMPRESA.unique().tolist()
-    # create a state variable to hold the current value of key variable
-    if 'key' not in st.session_state:
-        st.session_state['key'] = None
     col1, col2,col3 = st.columns(3)
     with col1:
         st.header('EMPRESA')
-        selected_company = st.selectbox(f'Seleccione empresa', empresa_obs, index= 0)
-    # filter dataframe by selected company
-    obs_df_2023 = obs_df_2023[obs_df_2023['EMPRESA'] == selected_company]
-    lotes_gdf = lotes_gdf[lotes_gdf['empresa'] == selected_company]
-    campo_obs = obs_df_2023.Campo.unique().tolist()
-    campo_gdf = lotes_gdf.campo.unique().tolist()
-    # Initialize an empty dictionary
-    similar_dict = {}
-    N = 3
-    CUTOFF = 0.72
-    # Loop through each item in the template list
-    for item in campo_gdf:
-        # normalize the stings to lowercase and remove punctuation in campo_obs
-        campo_obs_norm = [str(c).lower() for c in campo_obs]
-        campo_obs_norm = [c.replace('.', ' ') for c in campo_obs_norm]
-        # Find the most similar item in df_columns list
-        similar_items = get_close_matches(item, campo_obs_norm, N, CUTOFF)
-        # get the index of the most similar item
-        similar_items_idx = [campo_obs_norm.index(i) for i in similar_items]
-        # get the most similar item in the original list
-        similar_items = [campo_obs[i] for i in similar_items_idx]
-        # If a similar item is found, add to the dictionary
-        if similar_items:
-            similar_dict[item] = similar_items[0]
-        else:
-            # If no similar item is found, set value as "no match"
-            similar_dict[item] = "no match"
-    similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index()
-    similar_dict_df.columns = ['gdf','obs']
-    # campo_obs = [str(c) for c in campo_obs]
-    # campo_obs.sort(key=str.lower)
-    campo_obs.insert(0, 'no match')
-    all_keys = similar_dict_df['gdf'].unique().tolist()
-    # Fields
-    lotes_gdf['campo_obs'] = lotes_gdf['campo'].map(similar_dict)
-    cutoff = 0.3
-    # for campo , df in lotes_gdf.groupby('campo'):
-    #     for i, row in df.iterrows():
-    #         c_obs = obs_df_2023.loc[obs_df_2023.Campo == row.campo_obs]
-    #         lote_obs = c_obs.Lote.unique()
-    #         # normalize the stings to lowercase and remove punctuation
-    #         lote_obs_norm = [str(c).lower() for c in lote_obs]
-    #         lote_obs_norm = [c.replace('.', ' ') for c in lote_obs_norm]
-    #         similar_items = get_close_matches(row.lote, lote_obs_norm, N, cutoff)
-    #         # get the index of the most similar item
-    #         similar_items_idx = [lote_obs_norm.index(i) for i in similar_items]
-    #         # get the most similar item in the original list
-    #         similar_items = [lote_obs[i] for i in similar_items_idx]
-    #         if similar_items:
-    #             lotes_gdf.loc[i, 'lote_obs'] = similar_items[0]
-    #         else:
-    #             lotes_gdf.loc[i, 'lote_obs'] = "no match"
-    def show_field(key):
-        key, selected_value = key
-        lote_obs = obs_df_2023[obs_df_2023['Campo'] == selected_value]['Lote'].unique().tolist()
-        lote_obs.insert(0, 'no match')
-        with col3:
-            # st.header(st.session_state['key'])
             st.header('Lote')
-            df_field = lotes_gdf[lotes_gdf['campo'] == key]
-            fields = df_field['lote'].unique().tolist()
-            with st.form("fields_form"):
-                for j,field in enumerate(fields):
-                    similar_items = get_close_matches(field, lote_obs, 3, 0.70)
-                    default = similar_items[0] if similar_items else 'no match'
-                    # selected_value = st.multiselect(f'{field} (.shp):', lote_obs, default=default, key='field'+str(j))
-                    selected_value = st.selectbox(f'{field} (.shp):', lote_obs, index = lote_obs.index(default)  , key='field'+str(j))
-                    # if selected_value:
-                    #     lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['lote'] == field), 'lote_obs'] = selected_value
-                    # else:
-                    lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['lote'] == field), 'lote_obs'] = default
-                if st.form_submit_button("Submit"):
-                    st.dataframe(lotes_gdf.drop(columns=['geometry','nombre','apellido','campo','campo_obs']))
-    def on_click(key):
-        def inner():
-            st.session_state['key'] = key
-            show_field(key)
-        return inner
-    with col2:
-        st.header('Campo')
-        for i, key in enumerate(all_keys):
-            selected_value = st.selectbox(f'{key}:', campo_obs, index=campo_obs.index(similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]))
-            # selected_value = st.multiselect(f'{key} (.shp):', campo_obs, default=similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0], key=i)
-            lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = similar_dict_df[similar_dict_df['gdf'] == key]['obs'].values[0]
-            if selected_value:
-                similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = selected_value
-                value = selected_value
-                lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = selected_value
             else:
-                # similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = 'no match'
-                value = similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs']
-                lotes_gdf.loc[(lotes_gdf['campo'] == key) & (lotes_gdf['empresa'] == selected_company), 'campo_obs'] = value
-            st.button('Show Fields', key=key, on_click=on_click([key,value]))
-        # st.dataframe(similar_dict_df)
-        # if st.button('Show Fields'):
-        #     st.dataframe(similar_dict_df)
-    # add download button
-st.sidebar.download_button(
-        label="Download GeoJSON",
-        data=lotes_gdf.to_json().encode('utf-8'),
-        file_name=f'{selected_company}.geojson',
-        # mime='text/csv',
-        mime = 'application/json',
-    )

 # add Test button
 test = True #st.sidebar.button('Test')
+if 'key' not in st.session_state:
+    st.session_state['key'] = None
+if 'lote_gdf' not in st.session_state:
+    gdf = gpd.read_file('data/lotes espacio crea_empresa.shp', encoding='utf-8')
+    gdf['campo_obs'] = None
+    gdf['lote_obs'] = None
+    st.session_state['lote_gdf'] = gdf
+if 'show_field' not in st.session_state:
+    st.session_state['show_field'] = None
+if 'selected_company' not in st.session_state:
+    st.session_state['selected_company'] = None
+if 'selected_farm' not in st.session_state:
+    st.session_state['selected_farm'] = None
+if 'estado_farm' not in st.session_state:
+    st.session_state['estado_farm'] = []
+if 'estado_field' not in st.session_state:
+    st.session_state['estado_field'] = []
+def show_text():
+    def inner_func():
+        st.write('recuerde guardar')
+    return inner_func
 if test:
+    gdf = st.session_state['lote_gdf']
+    obs_df = pd.read_csv('data/obs_df_2023_12_1.csv')
+    obs_df.fillna('-', inplace=True)
+    obs_df.Campo = obs_df.Campo.astype('str')
+    ##### Columns Section #####
     col1, col2,col3 = st.columns(3)
     with col1:
         st.header('EMPRESA')
+        # comp_list = obs_df.EMPRESA.unique().tolist()
+        comp_list = gdf.empresa.unique().tolist()
+        selected_company = st.selectbox(f'Seleccione empresa', comp_list, index= 0)
+        st.session_state['selected_company'] = selected_company
+    with col2:
+            st.header('Campo')
+            selected_company = st.session_state['selected_company']
+            obs_df_comp = obs_df[obs_df['EMPRESA'] == selected_company]
+            farm_obs_names = obs_df_comp.Campo.unique().tolist()
+            farm_obs_names.insert(0, 'no match')
+            farm_gdf = gdf[gdf['empresa'] == selected_company]
+            farm_gdf_names = farm_gdf.campo.unique().tolist()
+            similar_dict = {}
+            for item in farm_gdf_names:
+                # normalize the stings to lowercase and remove punctuation in campo_obs
+                farm_obs_norm = [str(c).lower() for c in farm_obs_names]
+                farm_obs_norm = [c.replace('.', ' ') for c in farm_obs_norm]
+                # Find the most similar item in df_columns list
+                similar_items = get_close_matches(item, farm_obs_norm, 3, 0.72)
+                # get the index of the most similar item
+                similar_items_idx = [farm_obs_norm.index(i) for i in similar_items]
+                # get the most similar item in the original list
+                similar_items = [farm_obs_names[i] for i in similar_items_idx]
+                # If a similar item is found, add to the dictionary
+                if similar_items:
+                    similar_dict[item] = similar_items[0]
+                else:
+                    # If no similar item is found, set value as "no match"
+                    similar_dict[item] = "no match"
+            similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index()
+            similar_dict_df.columns = ['gdf','obs']
+            with st.form(key='farm_name'):
+                sel_farm_name = {}
+                for i, farm in enumerate(farm_gdf_names):
+                    cll_val = farm_gdf[farm_gdf['campo'] == farm]['campo_obs'].unique()[0]
+                    if cll_val == None:
+                        index=farm_obs_names.index(similar_dict_df[similar_dict_df['gdf'] == farm]['obs'].values[0])
+                    else:
+                        index=farm_obs_names.index(cll_val)
+                    selected_value = st.selectbox(f'{farm}:', \
+                            farm_obs_names, index=index)
+                    sel_farm_name[farm] = selected_value
+                submitted = st.form_submit_button(label='Guardar')
+                if selected_company not in st.session_state['estado_farm']:
+                    st.write('sin guardar')
+                else:
+                    st.write('guardado')
+                if submitted:
+                    st.session_state['estado_farm'].append(selected_company)
+                    st.write('guardado')
+                    for key, value in sel_farm_name.items():
+                        similar_dict_df.loc[similar_dict_df['gdf'] == key, 'obs'] = value
+                        # farm_gdf.loc[farm_gdf['campo'] == key, 'campo_obs'] = value
+                        gdf.loc[(gdf['campo'] == key) & (gdf['empresa']), 'campo_obs'] = value
+                    st.session_state['lote_gdf'] = gdf
+                    st.session_state['show_field'] = True
+    with col3:
+        if st.session_state['show_field']:
             st.header('Lote')
+            gdf = st.session_state['lote_gdf']
+            selected_company = st.session_state['selected_company']
+            farm_list = gdf[gdf['empresa'] == selected_company]['campo'].unique().tolist()
+            selected_farm = st.selectbox(f'Seleccione campo', farm_list, index=0)
+            selected_obs_farm = gdf[(gdf['empresa'] == selected_company)&(gdf['campo'] == selected_farm)]['campo_obs'].unique()[0]
+            field_gdf_names = gdf[(gdf['empresa'] == selected_company)&(gdf['campo'] == selected_farm)]['lote'].unique().tolist()
+            field_obs_names = obs_df[(obs_df['EMPRESA'] == selected_company)&(obs_df['Campo'] == selected_obs_farm)]['Lote'].unique().tolist()
+            field_obs_names.insert(0, 'no match')
+            if selected_farm not in st.session_state['estado_field']:
+                st.write('sin guardar')
             else:
+                st.write('guardado')
+            similar_dict = {}
+            for item in field_gdf_names:
+                # normalize the stings to lowercase and remove punctuation in campo_obs
+                field_obs_norm = [str(c).lower() for c in field_obs_names]
+                field_obs_norm = [c.replace('.', ' ') for c in field_obs_norm]
+                # Find the most similar item in df_columns list
+                similar_items = get_close_matches(item.lower(), field_obs_norm, 3, 0.50)
+                # get the index of the most similar item
+                similar_items_idx = [field_obs_norm.index(i) for i in similar_items]
+                # get the most similar item in the original list
+                similar_items = [field_obs_names[i] for i in similar_items_idx]
+                # If a similar item is found, add to the dictionary
+                if similar_items:
+                    similar_dict[item] = similar_items[0]
+                else:
+                    # If no similar item is found, set value as "no match"
+                    similar_dict[item] = "no match"
+            field_similar_dict_df = pd.DataFrame.from_dict(similar_dict, orient='index').reset_index()
+            field_similar_dict_df.columns = ['gdf','obs']
+            with st.form(key='field_name'):
+                sel_field_name = {}
+                for i, field in enumerate(field_gdf_names):
+                    selected_field = st.selectbox(f'{field}:', \
+                            field_obs_names, index=field_obs_names.index(field_similar_dict_df[field_similar_dict_df['gdf'] == field]['obs'].values[0]))
+                    sel_field_name[field] = selected_field
+                submitted = st.form_submit_button(label='Submit')
+                if submitted:
+                    st.session_state['estado_field'].append(selected_farm)
+                    st.write('guardado')
+                    for key, value in sel_field_name.items():
+                        field_similar_dict_df.loc[field_similar_dict_df['gdf'] == key, 'obs'] = value
+                        # farm_gdf.loc[farm_gdf['campo'] == key, 'campo_obs'] = value
+                        gdf.loc[(gdf['empresa'] == selected_company) & (gdf['campo'] == selected_farm) & (gdf['lote'] == key), 'lote_obs'] = value
+                    st.session_state['lote_gdf'] = gdf
+                    st.session_state['show_field'] = True
+    ##### Download Section #####
+    st.sidebar.download_button(
+            label="Download GeoJSON",
+            data=gdf.to_json().encode('utf-8'),
+            file_name=f'{selected_company}.geojson',
+            # mime='text/csv',
+            mime = 'application/json',
+        )

data/lotes espacio crea_empresa.dbf CHANGED Viewed

Binary files a/data/lotes espacio crea_empresa.dbf and b/data/lotes espacio crea_empresa.dbf differ

data/obs_df_2023_12_1.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:898c3ec6db5aa30768c6dab3e816b084521fcac3bee8cb40a3760d96cbf04f9c
-size 12614158

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f032f24a2e7234e3217d1a0bbcadea6947d710b64169b89d705a2cb3353c7e4
+size 12155827