Spaces:
Running
Running
Commit
·
1662e26
1
Parent(s):
28aa6d6
feat: streamlit-app
Browse files- src/app.py +40 -0
- src/main.py +4 -4
- utilities/__pycache__/data_loader.cpython-312.pyc +0 -0
- utilities/data_loader.py +4 -4
src/app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
######################################## IMPORTING REQUIRED LIBRARIES ####################################
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import pandas as pd
|
5 |
+
import streamlit as st
|
6 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
7 |
+
data_folder = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
8 |
+
from utilities import get_data, input_filter, clean_data, autogenerate_labels
|
9 |
+
|
10 |
+
def data_sourcing(left_lat, left_lon, dist, loc_name):
|
11 |
+
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
12 |
+
df = get_data(lat, lon, dist)
|
13 |
+
df.to_csv(f'{data_folder}/LOCATION_{loc_name}_DATA.csv', index=False)
|
14 |
+
return df
|
15 |
+
|
16 |
+
def data_clean_for_training(df):
|
17 |
+
df = clean_data(df)
|
18 |
+
df.to_csv(f'{data_folder}/MMR_DATA_CLEAN.csv', index=False)
|
19 |
+
return df
|
20 |
+
|
21 |
+
st.title("Map Data Analysis - ETL Pipeline")
|
22 |
+
|
23 |
+
left_lat = st.number_input("Enter the left latitude", value=18.889833)
|
24 |
+
left_lon = st.number_input("Enter the left longitude", value=72.779844)
|
25 |
+
print(left_lat, left_lon)
|
26 |
+
loc_name = st.text_input("Enter the location name", value="Mumbai")
|
27 |
+
dist = st.number_input("Enter the distance", value=35)
|
28 |
+
|
29 |
+
if st.button("Run ETL Pipeline"):
|
30 |
+
df = data_sourcing(left_lat, left_lon, dist)
|
31 |
+
if df:
|
32 |
+
st.write("Data loaded successfully !!")
|
33 |
+
|
34 |
+
df = clean_data(df)
|
35 |
+
labelled_df, embeddings_df = autogenerate_labels(df)
|
36 |
+
|
37 |
+
labelled_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_LABELLED.csv', index=False)
|
38 |
+
embeddings_df.to_csv(f'{data_folder}/DATA_{loc_name}_CLEAN_EMBEDDINGS.csv', index=False)
|
39 |
+
|
40 |
+
st.write("ETL Pipeline executed successfully !!")
|
src/main.py
CHANGED
@@ -9,11 +9,11 @@ from utilities import get_data, input_filter, clean_data, autogenerate_labels
|
|
9 |
|
10 |
################################################## INPUTS ################################################
|
11 |
|
12 |
-
left_lat = 18.889833
|
13 |
-
left_lon = 72.779844
|
14 |
-
dist = 35
|
15 |
|
16 |
-
def data_sourcing():
|
17 |
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
18 |
df = get_data(lat, lon, dist)
|
19 |
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
|
|
|
9 |
|
10 |
################################################## INPUTS ################################################
|
11 |
|
12 |
+
# left_lat = 18.889833
|
13 |
+
# left_lon = 72.779844
|
14 |
+
# dist = 35
|
15 |
|
16 |
+
def data_sourcing(left_lat, left_lon, dist):
|
17 |
lat, lon = input_filter(lat = left_lat, lon=left_lon)
|
18 |
df = get_data(lat, lon, dist)
|
19 |
df.to_csv(f'{data_folder}/MMR_DATA.csv', index=False)
|
utilities/__pycache__/data_loader.cpython-312.pyc
CHANGED
Binary files a/utilities/__pycache__/data_loader.cpython-312.pyc and b/utilities/__pycache__/data_loader.cpython-312.pyc differ
|
|
utilities/data_loader.py
CHANGED
@@ -168,9 +168,9 @@ def create_map_grid(bottom_left: Tuple[float, float], top_right: Tuple[float, fl
|
|
168 |
|
169 |
## entire pipeline
|
170 |
|
171 |
-
left_lat = 18.889833
|
172 |
-
left_lon = 72.779844
|
173 |
-
dist = 35
|
174 |
|
175 |
def input_filter(lat=None, lon=None, string=None):
|
176 |
if lat != None:
|
@@ -187,7 +187,7 @@ def get_data(bottom_left_lat, bottom_left_lon, dist):
|
|
187 |
|
188 |
top_right_lat = result[1][0]
|
189 |
top_right_lon = result[0][1]
|
190 |
-
grid = create_map_grid((
|
191 |
|
192 |
grid_dataset = []
|
193 |
for i, row in enumerate(grid):
|
|
|
168 |
|
169 |
## entire pipeline
|
170 |
|
171 |
+
# left_lat = 18.889833
|
172 |
+
# left_lon = 72.779844
|
173 |
+
# dist = 35
|
174 |
|
175 |
def input_filter(lat=None, lon=None, string=None):
|
176 |
if lat != None:
|
|
|
187 |
|
188 |
top_right_lat = result[1][0]
|
189 |
top_right_lon = result[0][1]
|
190 |
+
grid = create_map_grid((bottom_left_lat, bottom_left_lon), (top_right_lat, top_right_lon), dist, dist)
|
191 |
|
192 |
grid_dataset = []
|
193 |
for i, row in enumerate(grid):
|