akhil-vaidya commited on
Commit
1662e26
·
1 Parent(s): 28aa6d6

feat: streamlit-app

Browse files
src/app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ######################################## IMPORTING REQUIRED LIBRARIES ####################################
2
+ import os
3
+ import sys
4
+ import pandas as pd
5
+ import streamlit as st
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+ data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
8
+ from utilities import get_data, input_filter, clean_data, autogenerate_labels
9
+
10
+ def data_sourcing(left_lat, left_lon, dist, loc_name):
11
+ lat, lon = input_filter(lat = left_lat, lon=left_lon)
12
+ df = get_data(lat, lon, dist)
13
+ df.to_csv(f'{data_folder}/LOCATION_{loc_name}_DATA.csv', index=False)
14
+ return df
15
+
16
+ def data_clean_for_training(df):
17
+ df = clean_data(df)
18
+ df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
19
+ return df
20
+
21
+ st.title("Map Data Analysis - ETL Pipeline")
22
+
23
+ left_lat = st.number_input("Enter the left latitude", value=18.889833)
24
+ left_lon = st.number_input("Enter the left longitude", value=72.779844)
25
+ print(left_lat, left_lon)
26
+ loc_name = st.text_input("Enter the location name", value="Mumbai")
27
+ dist = st.number_input("Enter the distance", value=35)
28
+
29
+ if st.button("Run ETL Pipeline"):
30
+ df = data_sourcing(left_lat, left_lon, dist)
31
+ if df:
32
+ st.write("Data loaded successfully !!")
33
+
34
+ df = clean_data(df)
35
+ labelled_df, embeddings_df = autogenerate_labels(df)
36
+
37
+ labelled_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_LABELLED.csv', index=False)
38
+ embeddings_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_EMBEDDINGS.csv', index=False)
39
+
40
+ st.write("ETL Pipeline executed successfully !!")
src/main.py CHANGED
@@ -9,11 +9,11 @@ from utilities import get_data, input_filter, clean_data, autogenerate_labels
9
 
10
  ################################################## INPUTS ################################################
11
 
12
- left_lat = 18.889833
13
- left_lon = 72.779844
14
- dist = 35
15
 
16
- def data_sourcing():
17
  lat, lon = input_filter(lat = left_lat, lon=left_lon)
18
  df = get_data(lat, lon, dist)
19
  df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
 
9
 
10
  ################################################## INPUTS ################################################
11
 
12
+ # left_lat = 18.889833
13
+ # left_lon = 72.779844
14
+ # dist = 35
15
 
16
+ def data_sourcing(left_lat, left_lon, dist):
17
  lat, lon = input_filter(lat = left_lat, lon=left_lon)
18
  df = get_data(lat, lon, dist)
19
  df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
utilities/__pycache__/data_loader.cpython-312.pyc CHANGED
Binary files a/utilities/__pycache__/data_loader.cpython-312.pyc and b/utilities/__pycache__/data_loader.cpython-312.pyc differ
 
utilities/data_loader.py CHANGED
@@ -168,9 +168,9 @@ def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, fl
168
 
169
  ## entire pipeline
170
 
171
- left_lat = 18.889833
172
- left_lon = 72.779844
173
- dist = 35
174
 
175
  def input_filter(lat=None, lon=None, string=None):
176
  if lat != None:
@@ -187,7 +187,7 @@ def get_data(bottom_left_lat, bottom_left_lon, dist):
187
 
188
  top_right_lat = result[1][0]
189
  top_right_lon = result[0][1]
190
- grid = create_map_grid((left_lat, left_lon), (top_right_lat, top_right_lon), dist, dist)
191
 
192
  grid_dataset = []
193
  for i, row in enumerate(grid):
 
168
 
169
  ## entire pipeline
170
 
171
+ # left_lat = 18.889833
172
+ # left_lon = 72.779844
173
+ # dist = 35
174
 
175
  def input_filter(lat=None, lon=None, string=None):
176
  if lat != None:
 
187
 
188
  top_right_lat = result[1][0]
189
  top_right_lon = result[0][1]
190
+ grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)
191
 
192
  grid_dataset = []
193
  for i, row in enumerate(grid):