Spaces:
Sleeping
Sleeping
viraj
commited on
Commit
·
9e6917b
1
Parent(s):
d15db98
Initial Commit
Browse files- Dockerfile +11 -0
- Excel/Noonfood_excel.py +173 -0
- Excel/Noonfood_excel_final.py +129 -0
- Excel/__init__.py +0 -0
- Excel/__pycache__/Noonfood_excel.cpython-311.pyc +0 -0
- Excel/__pycache__/Noonfood_excel.cpython-312.pyc +0 -0
- Excel/__pycache__/__init__.cpython-311.pyc +0 -0
- Excel/__pycache__/__init__.cpython-312.pyc +0 -0
- Excel/__pycache__/excel.cpython-311.pyc +0 -0
- Excel/__pycache__/excel_final.cpython-311.pyc +0 -0
- Excel/__pycache__/excel_final.cpython-312.pyc +0 -0
- Excel/excel.py +112 -0
- Excel/excel_final.py +119 -0
- Excel/links_excel.py +152 -0
- Excel/streamlit_excel.py +301 -0
- Mongo/Deliveroo_excel.py +5 -0
- Mongo/Noonfood_Mongo_URL_From_location.py +250 -0
- Mongo/Noonfood_location.py +153 -0
- Mongo/Noonfood_mongo_Single_URL.py +274 -0
- Mongo/Noonfood_multy.py +6 -0
- Mongo/__init__.py +0 -0
- Mongo/__pycache__/Deliveroo_excel.cpython-311.pyc +0 -0
- Mongo/__pycache__/Deliveroo_excel.cpython-312.pyc +0 -0
- Mongo/__pycache__/Noonfood_Mongo_URL_From_location.cpython-311.pyc +0 -0
- Mongo/__pycache__/Noonfood_Mongo_URL_From_location.cpython-312.pyc +0 -0
- Mongo/__pycache__/Noonfood_location.cpython-311.pyc +0 -0
- Mongo/__pycache__/Noonfood_location.cpython-312.pyc +0 -0
- Mongo/__pycache__/Noonfood_mongo_Single_URL.cpython-311.pyc +0 -0
- Mongo/__pycache__/Noonfood_mongo_Single_URL.cpython-312.pyc +0 -0
- Mongo/__pycache__/Noonfood_multy.cpython-311.pyc +0 -0
- Mongo/__pycache__/Noonfood_multy.cpython-312.pyc +0 -0
- Mongo/__pycache__/__init__.cpython-311.pyc +0 -0
- Mongo/__pycache__/__init__.cpython-312.pyc +0 -0
- Mongo/__pycache__/mongo_final.cpython-311.pyc +0 -0
- Mongo/__pycache__/mongo_final.cpython-312.pyc +0 -0
- Mongo/__pycache__/mongo_one.cpython-311.pyc +0 -0
- Mongo/__pycache__/mongo_one.cpython-312.pyc +0 -0
- Mongo/mongo_final.py +292 -0
- Mongo/mongo_one.py +243 -0
- app.py +151 -0
- requirements.txt +11 -0
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
Excel/Noonfood_excel.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, File, UploadFile,HTTPException
|
2 |
+
from fastapi.responses import StreamingResponse
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import requests
|
6 |
+
import os
|
7 |
+
import io
|
8 |
+
import zipfile
|
9 |
+
from urllib.parse import urljoin
|
10 |
+
from typing import List
|
11 |
+
from pydantic import BaseModel
|
12 |
+
from io import BytesIO
|
13 |
+
|
14 |
+
# app = FastAPI()
|
15 |
+
|
16 |
+
# Input model
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
# Extract items with categories
|
22 |
+
def extract_items_with_categories(menu):
|
23 |
+
items_list = []
|
24 |
+
categories_seen = set() # Set to keep track of categories that have been added
|
25 |
+
for category in menu['categories']:
|
26 |
+
category_name = category['name']
|
27 |
+
category_position = category['position'] if category['position'] != -1 else 0
|
28 |
+
if category_name not in categories_seen:
|
29 |
+
items_list.append({
|
30 |
+
'category': category_name,
|
31 |
+
'position': category_position,
|
32 |
+
'Item': None, # Placeholder for item
|
33 |
+
'itemCode': None,
|
34 |
+
'item_position': None,
|
35 |
+
'img_url': None,
|
36 |
+
'Description': None,
|
37 |
+
'price_dine': None
|
38 |
+
|
39 |
+
})
|
40 |
+
categories_seen.add(category_name)
|
41 |
+
for item_code in category['items']:
|
42 |
+
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
|
43 |
+
if item:
|
44 |
+
items_list.append({
|
45 |
+
'category': '', # Empty string for subsequent items in the same category
|
46 |
+
'position': category['position'],
|
47 |
+
'Item': item['name'],
|
48 |
+
'itemCode': item.get("itemCode",""),
|
49 |
+
'item_position': item['position'],
|
50 |
+
'img_url': "https://f.nooncdn.com/food_production/"+item['image'],
|
51 |
+
'Description': item['itemDesc'],
|
52 |
+
'price_dine': item['price']
|
53 |
+
|
54 |
+
})
|
55 |
+
return items_list
|
56 |
+
|
57 |
+
# Extract options
|
58 |
+
def extract_options(menu):
|
59 |
+
options_dict = {}
|
60 |
+
for item in menu['items']:
|
61 |
+
if 'modifiers' in item:
|
62 |
+
for modifier_code in item['modifiers']:
|
63 |
+
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
|
64 |
+
if modifier:
|
65 |
+
if item['itemCode'] not in options_dict:
|
66 |
+
options_dict[item['itemCode']] = {}
|
67 |
+
if modifier['name'] not in options_dict[item['itemCode']]:
|
68 |
+
options_dict[item['itemCode']][modifier['name']] = {
|
69 |
+
'Min': modifier.get('minTotalOptions'),
|
70 |
+
'Max': modifier.get('maxTotalOptions'),
|
71 |
+
'Options': []
|
72 |
+
}
|
73 |
+
for option in modifier['options']:
|
74 |
+
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
|
75 |
+
if option_item:
|
76 |
+
options_dict[item['itemCode']][modifier['name']]['Options'].append({
|
77 |
+
'Option name': option_item['name'],
|
78 |
+
'Option price': option['price']
|
79 |
+
})
|
80 |
+
return options_dict
|
81 |
+
|
82 |
+
# Process data for a single URL
|
83 |
+
def process_url(url, latitude, longitude):
|
84 |
+
outlet_code = url.split('/')[-2]
|
85 |
+
|
86 |
+
# Make the request
|
87 |
+
api_url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
|
88 |
+
payload = {
|
89 |
+
"addressLat": latitude,
|
90 |
+
"addressLng": longitude,
|
91 |
+
"deliveryType": "default",
|
92 |
+
"outletCode": outlet_code
|
93 |
+
}
|
94 |
+
headers = {
|
95 |
+
'Connection': 'keep-alive',
|
96 |
+
"Accept": "application/json, text/plain, */*",
|
97 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
98 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
99 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
100 |
+
"Content-Type": "application/json",
|
101 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
102 |
+
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
|
103 |
+
}
|
104 |
+
response = requests.post(api_url, headers=headers, json=payload)
|
105 |
+
json_data = response.json()
|
106 |
+
|
107 |
+
# Extract items and options
|
108 |
+
items = extract_items_with_categories(json_data['data']['menu'])
|
109 |
+
options = extract_options(json_data['data']['menu'])
|
110 |
+
|
111 |
+
# Create a DataFrame for items
|
112 |
+
items_df = pd.DataFrame(items)
|
113 |
+
|
114 |
+
options_list = []
|
115 |
+
for item_code, option_groups in options.items():
|
116 |
+
for group_name, group_data in option_groups.items():
|
117 |
+
row = {
|
118 |
+
'itemCode': item_code,
|
119 |
+
'Option Group Name': group_name,
|
120 |
+
'Min': group_data.get('Min'),
|
121 |
+
'Max': group_data.get('Max')
|
122 |
+
}
|
123 |
+
for i, option in enumerate(group_data['Options']):
|
124 |
+
row[f'Option name {i+1}'] = option['Option name']
|
125 |
+
row[f'Option price {i+1}'] = option['Option price']
|
126 |
+
options_list.append(row)
|
127 |
+
|
128 |
+
# Create DataFrame for options
|
129 |
+
options_df = pd.DataFrame(options_list)
|
130 |
+
|
131 |
+
# Merge DataFrames on 'itemCode'
|
132 |
+
merged_df = items_df.merge(options_df, on='itemCode', how='left')
|
133 |
+
|
134 |
+
merged_df['category'] = merged_df['category'].replace('', np.nan).ffill()
|
135 |
+
merged_df['Item'] = merged_df['Item'].replace('', np.nan)
|
136 |
+
merged_df.iloc[:, :7] = merged_df.iloc[:, :7].mask(merged_df.iloc[:, :7].duplicated(), '')
|
137 |
+
merged_df = merged_df.dropna(how='all')
|
138 |
+
non_cat_pos_columns = merged_df.columns.difference(['category', 'position'])
|
139 |
+
mask = merged_df[non_cat_pos_columns].isna().all(axis=1) & merged_df[['category', 'position']].notna().all(axis=1)
|
140 |
+
merged_df = merged_df[~mask]
|
141 |
+
|
142 |
+
if 'Max' in merged_df.columns:
|
143 |
+
max_column_index = merged_df.columns.get_loc('Max')
|
144 |
+
for i in range(max_column_index + 1, len(merged_df.columns)):
|
145 |
+
merged_df.rename(columns={merged_df.columns[i]: ''}, inplace=True)
|
146 |
+
merged_df = merged_df.drop(columns=['itemCode'])
|
147 |
+
|
148 |
+
|
149 |
+
output = BytesIO()
|
150 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
151 |
+
merged_df.to_excel(writer, index=False, sheet_name='Menu')
|
152 |
+
output.seek(0)
|
153 |
+
|
154 |
+
return output,f"{outlet_code}_menu.xlsx"
|
155 |
+
# @app.post("/generate_and_download", response_class=StreamingResponse)
|
156 |
+
# def generate_and_download(details: RestaurantDetailsRequest):
|
157 |
+
# files = []
|
158 |
+
# for url in details.urls:
|
159 |
+
# output_filename = process_url(url, details.latitude, details.longitude)
|
160 |
+
# files.append(output_filename)
|
161 |
+
|
162 |
+
# zip_buffer = io.BytesIO()
|
163 |
+
# with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
|
164 |
+
# for file in files:
|
165 |
+
# zip_file.write(file, os.path.basename(file))
|
166 |
+
# zip_buffer.seek(0)
|
167 |
+
|
168 |
+
# return StreamingResponse(zip_buffer, media_type="application/x-zip-compressed", headers={"Content-Disposition": "attachment;filename=output_files.zip"})
|
169 |
+
|
170 |
+
# if __name__ == "__main__":
|
171 |
+
# import uvicorn
|
172 |
+
# uvicorn.run(app, host="127.0.0.1", port=8000)
|
173 |
+
|
Excel/Noonfood_excel_final.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from fastapi import FastAPI, Query
|
6 |
+
|
7 |
+
app=FastAPI()
|
8 |
+
|
9 |
+
latitude=int(input("Enter latitude:"))
|
10 |
+
longitude=int(input("Enter longitude:"))
|
11 |
+
url1=input("Enter the restaurant url:")
|
12 |
+
outlet_code = url1.split('/')[-2]
|
13 |
+
|
14 |
+
def extract_items_with_categories(menu):
|
15 |
+
items_list = []
|
16 |
+
categories_seen = set() # Set to keep track of categories that have been added
|
17 |
+
for category in menu['categories']:
|
18 |
+
category_name = category['name']
|
19 |
+
category_position = category['position'] if category['position'] != -1 else 0
|
20 |
+
if category_name not in categories_seen:
|
21 |
+
items_list.append({
|
22 |
+
'category': category_name,
|
23 |
+
'item': None, # Placeholder for item
|
24 |
+
'itemCode': None,
|
25 |
+
'item-position': None,
|
26 |
+
'img-url': None,
|
27 |
+
'price': None,
|
28 |
+
'Description': None,
|
29 |
+
'position': category_position
|
30 |
+
})
|
31 |
+
categories_seen.add(category_name)
|
32 |
+
for item_code in category['items']:
|
33 |
+
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
|
34 |
+
if item:
|
35 |
+
items_list.append({
|
36 |
+
'category': '', # Empty string for subsequent items in the same category
|
37 |
+
'item': item['name'],
|
38 |
+
'itemCode': item['itemCode'],
|
39 |
+
'item-position': item['position'],
|
40 |
+
'img-url': "https://f.nooncdn.com/food_production/"+item['image'],
|
41 |
+
'price': item['price'],
|
42 |
+
'Description': item['itemDesc'],
|
43 |
+
'position': category['position']
|
44 |
+
})
|
45 |
+
return items_list
|
46 |
+
|
47 |
+
def extract_options(menu):
|
48 |
+
options_dict = {}
|
49 |
+
for item in menu['items']:
|
50 |
+
if 'modifiers' in item:
|
51 |
+
for modifier_code in item['modifiers']:
|
52 |
+
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
|
53 |
+
if modifier:
|
54 |
+
if item['itemCode'] not in options_dict:
|
55 |
+
options_dict[item['itemCode']] = {}
|
56 |
+
if modifier['name'] not in options_dict[item['itemCode']]:
|
57 |
+
options_dict[item['itemCode']][modifier['name']] = {
|
58 |
+
'Min': modifier.get('minTotalOptions'),
|
59 |
+
'Max': modifier.get('maxTotalOptions'),
|
60 |
+
'Options': []
|
61 |
+
}
|
62 |
+
for option in modifier['options']:
|
63 |
+
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
|
64 |
+
if option_item:
|
65 |
+
options_dict[item['itemCode']][modifier['name']]['Options'].append({
|
66 |
+
'Option name': option_item['name'],
|
67 |
+
'Option price': option['price']
|
68 |
+
})
|
69 |
+
return options_dict
|
70 |
+
|
71 |
+
# Make the request
|
72 |
+
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
|
73 |
+
payload = {
|
74 |
+
"addressLat": latitude,
|
75 |
+
"addressLng": longitude,
|
76 |
+
"deliveryType": "default",
|
77 |
+
"outletCode": outlet_code
|
78 |
+
}
|
79 |
+
headers = {
|
80 |
+
'Connection': 'keep-alive',
|
81 |
+
"Accept": "application/json, text/plain, */*",
|
82 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
83 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
84 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
85 |
+
"Content-Type": "application/json",
|
86 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
87 |
+
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
|
88 |
+
}
|
89 |
+
response = requests.post(url, headers=headers, json=payload)
|
90 |
+
json_data = response.json()
|
91 |
+
|
92 |
+
|
93 |
+
# Extract items and options
|
94 |
+
items = extract_items_with_categories(json_data['data']['menu'])
|
95 |
+
options = extract_options(json_data['data']['menu'])
|
96 |
+
|
97 |
+
# Create a DataFrame for items
|
98 |
+
items_df = pd.DataFrame(items)
|
99 |
+
|
100 |
+
options_list = []
|
101 |
+
for item_code, option_groups in options.items():
|
102 |
+
for group_name, group_data in option_groups.items():
|
103 |
+
row = {
|
104 |
+
'itemCode': item_code,
|
105 |
+
'Option Group Name': group_name,
|
106 |
+
'Min': group_data.get('Min'),
|
107 |
+
'Max': group_data.get('Max')
|
108 |
+
}
|
109 |
+
for i, option in enumerate(group_data['Options']):
|
110 |
+
row[f'Option name {i+1}'] = option['Option name']
|
111 |
+
row[f'Option price {i+1}'] = option['Option price']
|
112 |
+
options_list.append(row)
|
113 |
+
|
114 |
+
# Create DataFrame for options
|
115 |
+
options_df = pd.DataFrame(options_list)
|
116 |
+
|
117 |
+
# Merge DataFrames on 'itemCode'
|
118 |
+
merged_df = items_df.merge(options_df, on='itemCode', how='left')
|
119 |
+
|
120 |
+
merged_df['category'] = merged_df['category'].replace('', np.nan).ffill()
|
121 |
+
merged_df['item'] = merged_df['item'].replace('', np.nan)
|
122 |
+
#merged_df.iloc[:, :7] = merged_df.groupby('category').apply(lambda x: x.ffill().mask(x.duplicated(), '')).reset_index(level=0, drop=True)
|
123 |
+
|
124 |
+
#merged_df['category'] = merged_df['category'].replace('', pd.NA).ffill()
|
125 |
+
merged_df.iloc[:, :7] = merged_df.iloc[:, :7].mask(merged_df.iloc[:, :7].duplicated(), '')
|
126 |
+
merged_df = merged_df.dropna(subset=['item', 'itemCode', 'item-position', 'img-url', 'price', 'Description'], how='all')
|
127 |
+
|
128 |
+
|
129 |
+
merged_df.to_excel("output3.xlsx", index=False)
|
Excel/__init__.py
ADDED
File without changes
|
Excel/__pycache__/Noonfood_excel.cpython-311.pyc
ADDED
Binary file (7.76 kB). View file
|
|
Excel/__pycache__/Noonfood_excel.cpython-312.pyc
ADDED
Binary file (7.94 kB). View file
|
|
Excel/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (167 Bytes). View file
|
|
Excel/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (202 Bytes). View file
|
|
Excel/__pycache__/excel.cpython-311.pyc
ADDED
Binary file (10.1 kB). View file
|
|
Excel/__pycache__/excel_final.cpython-311.pyc
ADDED
Binary file (7.42 kB). View file
|
|
Excel/__pycache__/excel_final.cpython-312.pyc
ADDED
Binary file (6.13 kB). View file
|
|
Excel/excel.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
def Excel_final(url):
|
7 |
+
def fetch_restaurant_data(url):
|
8 |
+
headers = {
|
9 |
+
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
|
10 |
+
}
|
11 |
+
response = requests.get(url, headers=headers)
|
12 |
+
if response.status_code != 200:
|
13 |
+
print(f"Failed to fetch the URL: {url}")
|
14 |
+
return None
|
15 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
16 |
+
script_tag = soup.find('script', id='__NEXT_DATA__')
|
17 |
+
if not script_tag:
|
18 |
+
print("Script tag not found")
|
19 |
+
return None
|
20 |
+
json_data = json.loads(script_tag.string)
|
21 |
+
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
|
22 |
+
items = json_data['items']
|
23 |
+
categories = json_data['categories']
|
24 |
+
category_map = {category['id']: category['name'] for category in categories}
|
25 |
+
modifier_groups = json_data['modifierGroups']
|
26 |
+
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
|
27 |
+
items_with_modifiers = []
|
28 |
+
current_category = None
|
29 |
+
current_category_position = 0
|
30 |
+
for item in items:
|
31 |
+
category_id = item['categoryId']
|
32 |
+
category_name = category_map.get(category_id, 'Unknown')
|
33 |
+
if category_name == "Unknown":
|
34 |
+
continue
|
35 |
+
if category_name != current_category:
|
36 |
+
current_category = category_name
|
37 |
+
current_category_position += 1
|
38 |
+
item_position = 1
|
39 |
+
else:
|
40 |
+
item_position += 1
|
41 |
+
item_with_modifiers = {
|
42 |
+
"id": item['id'],
|
43 |
+
"category_id": category_id,
|
44 |
+
"category_name": category_name,
|
45 |
+
"category_position": current_category_position,
|
46 |
+
"item_position": item_position,
|
47 |
+
"name": item['name'],
|
48 |
+
"description": item.get('description', ''),
|
49 |
+
"price": item['price']['formatted'],
|
50 |
+
"img_url": item.get('image').get('url', '') if item.get('image') else '',
|
51 |
+
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
|
52 |
+
}
|
53 |
+
items_with_modifiers.append(item_with_modifiers)
|
54 |
+
return items_with_modifiers
|
55 |
+
def save_data_to_excel(data):
|
56 |
+
writer = pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter')
|
57 |
+
rows = []
|
58 |
+
max_options = 0
|
59 |
+
# Find the maximum number of options for any modifier group
|
60 |
+
for item in data:
|
61 |
+
for modifier_group in item['modifier_groups']:
|
62 |
+
num_options = len(modifier_group.get('modifierOptions', []))
|
63 |
+
if num_options > max_options:
|
64 |
+
max_options = num_options
|
65 |
+
for item in data:
|
66 |
+
base_row = [
|
67 |
+
item['category_name'],
|
68 |
+
item['category_position'],
|
69 |
+
item['item_position'],
|
70 |
+
item['name'],
|
71 |
+
item['description'],
|
72 |
+
item['price'],
|
73 |
+
item['img_url'],
|
74 |
+
]
|
75 |
+
first_modifier_group = True
|
76 |
+
for modifier_group in item['modifier_groups']:
|
77 |
+
modifier_group_row = base_row + [
|
78 |
+
modifier_group.get('name', ''),
|
79 |
+
modifier_group.get('minSelection', ''),
|
80 |
+
modifier_group.get('maxSelection', '')
|
81 |
+
]
|
82 |
+
options = modifier_group.get('modifierOptions', [])
|
83 |
+
for option in options:
|
84 |
+
modifier_group_row += [
|
85 |
+
option.get('name', ''),
|
86 |
+
option['price']['formatted'] if option.get('price') else ''
|
87 |
+
]
|
88 |
+
# Fill in the remaining columns with empty strings if there are fewer options than max_options
|
89 |
+
modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
|
90 |
+
if first_modifier_group:
|
91 |
+
rows.append(modifier_group_row)
|
92 |
+
first_modifier_group = False
|
93 |
+
else:
|
94 |
+
rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
|
95 |
+
if not item['modifier_groups']:
|
96 |
+
rows.append(base_row + [''] * (max_options * 2 + 3))
|
97 |
+
# Create column headers
|
98 |
+
columns = [
|
99 |
+
'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
|
100 |
+
]
|
101 |
+
for i in range(1, max_options + 1):
|
102 |
+
columns += [f'Option {i} Name', f'Option {i} Price']
|
103 |
+
df = pd.DataFrame(rows, columns=columns)
|
104 |
+
if 'Max Selection' in df.columns:
|
105 |
+
max_column_index = df.columns.get_loc('Max_quantity')
|
106 |
+
for i in range(max_column_index + 1, len(df.columns)):
|
107 |
+
df.rename(columns={df.columns[i]: ''}, inplace=True)
|
108 |
+
df.to_excel(writer, sheet_name='Sheet1', index=False)
|
109 |
+
writer.close()
|
110 |
+
print("Data saved to restaurant_data.xlsx")
|
111 |
+
data = fetch_restaurant_data(url)
|
112 |
+
save_data_to_excel(data)
|
Excel/excel_final.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import pandas as pd
|
5 |
+
from io import BytesIO
|
6 |
+
def Excel_final(url):
|
7 |
+
def fetch_restaurant_data(url):
|
8 |
+
headers = {
|
9 |
+
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
|
10 |
+
}
|
11 |
+
response = requests.get(url, headers=headers)
|
12 |
+
if response.status_code != 200:
|
13 |
+
print(f"Failed to fetch the URL: {url}")
|
14 |
+
return None
|
15 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
16 |
+
script_tag = soup.find('script', id='__NEXT_DATA__')
|
17 |
+
if not script_tag:
|
18 |
+
print("Script tag not found")
|
19 |
+
return None
|
20 |
+
json_data = json.loads(script_tag.string)
|
21 |
+
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
|
22 |
+
items = json_data['items']
|
23 |
+
categories = json_data['categories']
|
24 |
+
category_map = {category['id']: category['name'] for category in categories}
|
25 |
+
modifier_groups = json_data['modifierGroups']
|
26 |
+
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
|
27 |
+
items_with_modifiers = []
|
28 |
+
current_category = None
|
29 |
+
current_category_position = 0
|
30 |
+
for item in items:
|
31 |
+
category_id = item['categoryId']
|
32 |
+
category_name = category_map.get(category_id, 'Unknown')
|
33 |
+
if category_name == "Unknown":
|
34 |
+
continue
|
35 |
+
if category_name != current_category:
|
36 |
+
current_category = category_name
|
37 |
+
current_category_position += 1
|
38 |
+
item_position = 1
|
39 |
+
else:
|
40 |
+
item_position += 1
|
41 |
+
item_with_modifiers = {
|
42 |
+
"id": item['id'],
|
43 |
+
"category_id": category_id,
|
44 |
+
"category_name": category_name,
|
45 |
+
"category_position": current_category_position,
|
46 |
+
"item_position": item_position,
|
47 |
+
"name": item['name'],
|
48 |
+
"description": item.get('description', ''),
|
49 |
+
"price": item['price']['formatted'],
|
50 |
+
"img_url": item.get('image').get('url', '') if item.get('image') else '',
|
51 |
+
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
|
52 |
+
}
|
53 |
+
items_with_modifiers.append(item_with_modifiers)
|
54 |
+
return items_with_modifiers
|
55 |
+
|
56 |
+
def save_data_to_excel(data):
|
57 |
+
rows = []
|
58 |
+
max_options = 0
|
59 |
+
# Find the maximum number of options for any modifier group
|
60 |
+
for item in data:
|
61 |
+
for modifier_group in item['modifier_groups']:
|
62 |
+
num_options = len(modifier_group.get('modifierOptions', []))
|
63 |
+
if num_options > max_options:
|
64 |
+
max_options = num_options
|
65 |
+
for item in data:
|
66 |
+
base_row = [
|
67 |
+
item['category_name'],
|
68 |
+
item['category_position'],
|
69 |
+
item['item_position'],
|
70 |
+
item['name'],
|
71 |
+
item['description'],
|
72 |
+
item['price'],
|
73 |
+
item['img_url'],
|
74 |
+
]
|
75 |
+
first_modifier_group = True
|
76 |
+
for modifier_group in item['modifier_groups']:
|
77 |
+
modifier_group_row = base_row + [
|
78 |
+
modifier_group.get('name', ''),
|
79 |
+
modifier_group.get('minSelection', ''),
|
80 |
+
modifier_group.get('maxSelection', '')
|
81 |
+
]
|
82 |
+
options = modifier_group.get('modifierOptions', [])
|
83 |
+
for option in options:
|
84 |
+
modifier_group_row += [
|
85 |
+
option.get('name', ''),
|
86 |
+
option['price']['formatted'] if option.get('price') else ''
|
87 |
+
]
|
88 |
+
# Fill in the remaining columns with empty strings if there are fewer options than max_options
|
89 |
+
modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
|
90 |
+
if first_modifier_group:
|
91 |
+
rows.append(modifier_group_row)
|
92 |
+
first_modifier_group = False
|
93 |
+
else:
|
94 |
+
rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
|
95 |
+
if not item['modifier_groups']:
|
96 |
+
rows.append(base_row + [''] * (max_options * 2 + 3))
|
97 |
+
# Create column headers
|
98 |
+
columns = [
|
99 |
+
'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
|
100 |
+
]
|
101 |
+
for i in range(1, max_options + 1):
|
102 |
+
columns += [f'Option {i} Name', f'Option {i} Price']
|
103 |
+
df = pd.DataFrame(rows, columns=columns)
|
104 |
+
if 'Max Selection' in df.columns:
|
105 |
+
max_column_index = df.columns.get_loc('Max Selection')
|
106 |
+
for i in range(max_column_index + 1, len(df.columns)):
|
107 |
+
df.rename(columns={df.columns[i]: ''}, inplace=True)
|
108 |
+
return df
|
109 |
+
outlet_code = "Deliveroo_restro"
|
110 |
+
data = fetch_restaurant_data(url)
|
111 |
+
df = save_data_to_excel(data)
|
112 |
+
|
113 |
+
output = BytesIO()
|
114 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
115 |
+
df.to_excel(writer, index=False, sheet_name='Menu')
|
116 |
+
output.seek(0)
|
117 |
+
|
118 |
+
return output,f"{outlet_code}_menu.xlsx"
|
119 |
+
|
Excel/links_excel.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
def fetch_restaurant_links(city, location):
|
7 |
+
base_url = "https://deliveroo.ae"
|
8 |
+
url = f"{base_url}/restaurants/{city}/{location}/?collection=restaurants"
|
9 |
+
|
10 |
+
headers = {
|
11 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
12 |
+
'Cookie': '__cf_bm=oakl46sJ3V9vwmnIIbfXWfkHbGmmC2pH56GyTI33b4U-1715931048-1.0.1.1-4XOcSGSThZV_INfpn3aptlo8jpZtLFbYoLsZxP9BpQ8LIjq3wBIe8CPlSf0AomuniXy4TZWyVlBQBTlrm.CPiSfI1jzx18y9zxwc9GX0fmo; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=2e989653-2776-4ede-a52e-b610f1ad64a2'
|
13 |
+
}
|
14 |
+
|
15 |
+
response = requests.get(url, headers=headers)
|
16 |
+
if response.status_code == 200:
|
17 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
18 |
+
|
19 |
+
if "We couldn't find" in soup.text or "No restaurants" in soup.text:
|
20 |
+
print("No restaurants found for the specified location.")
|
21 |
+
return []
|
22 |
+
|
23 |
+
divs = soup.find_all('div', class_=["HomeFeedScrollTracker-bd9a6ffea8a4b4b7", "HomeFeedUICard-157f7be5d7b2fa7b"])
|
24 |
+
|
25 |
+
hrefs = [a_tag['href'] for div in divs for a_tag in div.find_all('a', href=True)]
|
26 |
+
hrefs = hrefs[:20]
|
27 |
+
return [f"{base_url}{href}" for href in hrefs]
|
28 |
+
else:
|
29 |
+
print("Response timed out.")
|
30 |
+
return []
|
31 |
+
|
32 |
+
def Excel_final(urls):
|
33 |
+
def fetch_restaurant_data(url):
|
34 |
+
headers = {
|
35 |
+
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
|
36 |
+
}
|
37 |
+
response = requests.get(url, headers=headers)
|
38 |
+
if response.status_code != 200:
|
39 |
+
print(f"Failed to fetch the URL: {url}")
|
40 |
+
return None
|
41 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
42 |
+
script_tag = soup.find('script', id='__NEXT_DATA__')
|
43 |
+
if not script_tag:
|
44 |
+
print("Script tag not found")
|
45 |
+
return None
|
46 |
+
json_data = json.loads(script_tag.string)
|
47 |
+
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
|
48 |
+
items = json_data['items']
|
49 |
+
categories = json_data['categories']
|
50 |
+
category_map = {category['id']: category['name'] for category in categories}
|
51 |
+
modifier_groups = json_data['modifierGroups']
|
52 |
+
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
|
53 |
+
items_with_modifiers = []
|
54 |
+
current_category = None
|
55 |
+
current_category_position = 0
|
56 |
+
for item in items:
|
57 |
+
category_id = item['categoryId']
|
58 |
+
category_name = category_map.get(category_id, 'Unknown')
|
59 |
+
if category_name == "Unknown":
|
60 |
+
continue
|
61 |
+
if category_name != current_category:
|
62 |
+
current_category = category_name
|
63 |
+
current_category_position += 1
|
64 |
+
item_position = 1
|
65 |
+
else:
|
66 |
+
item_position += 1
|
67 |
+
item_with_modifiers = {
|
68 |
+
"id": item['id'],
|
69 |
+
"category_id": category_id,
|
70 |
+
"category_name": category_name,
|
71 |
+
"category_position": current_category_position,
|
72 |
+
"item_position": item_position,
|
73 |
+
"name": item['name'],
|
74 |
+
"description": item.get('description', ''),
|
75 |
+
"price": item['price']['formatted'],
|
76 |
+
"img_url": item.get('image').get('url', '') if item.get('image') else '',
|
77 |
+
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
|
78 |
+
}
|
79 |
+
items_with_modifiers.append(item_with_modifiers)
|
80 |
+
return items_with_modifiers
|
81 |
+
|
82 |
+
def save_data_to_excel(data, sheet_name, writer):
|
83 |
+
rows = []
|
84 |
+
max_options = 0
|
85 |
+
# Find the maximum number of options for any modifier group
|
86 |
+
for item in data:
|
87 |
+
for modifier_group in item['modifier_groups']:
|
88 |
+
num_options = len(modifier_group.get('modifierOptions', []))
|
89 |
+
if num_options > max_options:
|
90 |
+
max_options = num_options
|
91 |
+
for item in data:
|
92 |
+
base_row = [
|
93 |
+
item['category_name'],
|
94 |
+
item['category_position'],
|
95 |
+
item['item_position'],
|
96 |
+
item['name'],
|
97 |
+
item['description'],
|
98 |
+
item['price'],
|
99 |
+
item['img_url'],
|
100 |
+
]
|
101 |
+
first_modifier_group = True
|
102 |
+
for modifier_group in item['modifier_groups']:
|
103 |
+
modifier_group_row = base_row + [
|
104 |
+
modifier_group.get('name', ''),
|
105 |
+
modifier_group.get('minSelection', ''),
|
106 |
+
modifier_group.get('maxSelection', '')
|
107 |
+
]
|
108 |
+
options = modifier_group.get('modifierOptions', [])
|
109 |
+
for option in options:
|
110 |
+
modifier_group_row += [
|
111 |
+
option.get('name', ''),
|
112 |
+
option['price']['formatted'] if option.get('price') else ''
|
113 |
+
]
|
114 |
+
# Fill in the remaining columns with empty strings if there are fewer options than max_options
|
115 |
+
modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
|
116 |
+
if first_modifier_group:
|
117 |
+
rows.append(modifier_group_row)
|
118 |
+
first_modifier_group = False
|
119 |
+
else:
|
120 |
+
rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
|
121 |
+
if not item['modifier_groups']:
|
122 |
+
rows.append(base_row + [''] * (max_options * 2 + 3))
|
123 |
+
# Create column headers
|
124 |
+
columns = [
|
125 |
+
'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
|
126 |
+
]
|
127 |
+
for i in range(1, max_options + 1):
|
128 |
+
columns += [f'Option {i} Name', f'Option {i} Price']
|
129 |
+
df = pd.DataFrame(rows, columns=columns)
|
130 |
+
if 'Max Selection' in df.columns:
|
131 |
+
max_column_index = df.columns.get_loc('Max Selection')
|
132 |
+
for i in range(max_column_index + 1, len(df.columns)):
|
133 |
+
df.rename(columns={df.columns[i]: ''}, inplace=True)
|
134 |
+
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
135 |
+
|
136 |
+
with pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter') as writer:
|
137 |
+
for idx, url in enumerate(urls):
|
138 |
+
data = fetch_restaurant_data(url)
|
139 |
+
if data:
|
140 |
+
save_data_to_excel(data, f'Sheet{idx+1}', writer)
|
141 |
+
print("Data saved to restaurant_data.xlsx")
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
city = input("Enter the city: ")
|
145 |
+
location = input("Enter the location: ")
|
146 |
+
|
147 |
+
urls = fetch_restaurant_links(city, location)
|
148 |
+
|
149 |
+
if urls:
|
150 |
+
Excel_final(urls)
|
151 |
+
else:
|
152 |
+
print("No restaurant links found or unable to fetch data.")
|
Excel/streamlit_excel.py
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
import streamlit as st
|
6 |
+
import mongoengine as me
|
7 |
+
import certifi
|
8 |
+
|
9 |
+
# Streamlit app setup
|
10 |
+
st.title("Restaurant Menu Data Extraction")
|
11 |
+
url1 = st.text_input("Please enter the restaurant URL:")
|
12 |
+
|
13 |
+
if url1:
|
14 |
+
# Extract outlet code from URL
|
15 |
+
outlet_code = url1.split('/')[-2]
|
16 |
+
st.write(f"Extracted outlet code: {outlet_code}")
|
17 |
+
|
18 |
+
# API request setup
|
19 |
+
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
|
20 |
+
payload = {
|
21 |
+
"addressLat": 244538840,
|
22 |
+
"addressLng": 543773438,
|
23 |
+
"deliveryType": "default",
|
24 |
+
"outletCode": outlet_code
|
25 |
+
}
|
26 |
+
headers = {
|
27 |
+
'Connection': 'keep-alive',
|
28 |
+
"Accept": "application/json, text/plain, */*",
|
29 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
30 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
31 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
32 |
+
"Content-Type": "application/json",
|
33 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
34 |
+
"Cookie":"bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
|
35 |
+
}
|
36 |
+
|
37 |
+
response = requests.post(url, headers=headers, json=payload)
|
38 |
+
|
39 |
+
if response.status_code == 200:
|
40 |
+
json_data = response.json()
|
41 |
+
with open(f'{outlet_code}.json', 'w') as json_file:
|
42 |
+
json.dump(json_data, json_file, indent=4)
|
43 |
+
st.success("JSON data has been written to json file.")
|
44 |
+
else:
|
45 |
+
st.error(f"Failed to retrieve content from the URL: {response.status_code}")
|
46 |
+
|
47 |
+
# Extract items and options
|
48 |
+
def extract_items_with_categories(menu):
|
49 |
+
items_list = []
|
50 |
+
for category in menu['categories']:
|
51 |
+
category_name = category['name']
|
52 |
+
category_position = category['position']
|
53 |
+
for item_code in category['items']:
|
54 |
+
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
|
55 |
+
if item:
|
56 |
+
items_list.append({
|
57 |
+
'category': category_name,
|
58 |
+
'item': item['name'],
|
59 |
+
'itemCode': item['itemCode'],
|
60 |
+
'item-position': item['position'],
|
61 |
+
'img-url': item['image'],
|
62 |
+
'price': item['price'],
|
63 |
+
'Description': item['itemDesc'],
|
64 |
+
'position': category_position,
|
65 |
+
})
|
66 |
+
return items_list
|
67 |
+
|
68 |
+
def extract_options(menu):
|
69 |
+
options_list = []
|
70 |
+
for item in menu['items']:
|
71 |
+
if 'modifiers' in item:
|
72 |
+
for modifier_code in item['modifiers']:
|
73 |
+
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
|
74 |
+
if modifier:
|
75 |
+
for option in modifier['options']:
|
76 |
+
option_item = next((item for item in menu['items'] if item['itemCode'] == option['itemCode']), None)
|
77 |
+
if option_item:
|
78 |
+
options_list.append({
|
79 |
+
'itemCode': item['itemCode'],
|
80 |
+
'itemName': item['name'],
|
81 |
+
'Option Group Name': modifier['name'],
|
82 |
+
'Min': modifier.get('minTotalOptions'),
|
83 |
+
'Max': modifier.get('maxTotalOptions'),
|
84 |
+
'Option name': option_item['name'],
|
85 |
+
'Option price': option['price']
|
86 |
+
})
|
87 |
+
return options_list
|
88 |
+
|
89 |
+
items_list = extract_items_with_categories(json_data['data']['menu'])
|
90 |
+
options_list = extract_options(json_data['data']['menu'])
|
91 |
+
|
92 |
+
def join_with_newline(x):
|
93 |
+
return '\n'.join(str(value) for value in x)
|
94 |
+
|
95 |
+
# Creating DataFrames with 'itemCode' column
|
96 |
+
df_items = pd.DataFrame(items_list)
|
97 |
+
df_options = pd.DataFrame(options_list)
|
98 |
+
|
99 |
+
# Merge the DataFrames based on 'itemCode'
|
100 |
+
final_df = pd.merge(df_items, df_options, on='itemCode', how='left')
|
101 |
+
|
102 |
+
final_df = final_df.groupby(['item', 'category'], as_index=False).agg({
|
103 |
+
'Option Group Name': join_with_newline,
|
104 |
+
'Option name': join_with_newline,
|
105 |
+
'Option price': join_with_newline,
|
106 |
+
'Min': join_with_newline,
|
107 |
+
'Max': join_with_newline,
|
108 |
+
'item-position': 'first',
|
109 |
+
'img-url': 'first',
|
110 |
+
'price': 'first',
|
111 |
+
'Description': 'first',
|
112 |
+
'item-position': 'first',
|
113 |
+
'position': 'first'
|
114 |
+
})
|
115 |
+
|
116 |
+
final_df = final_df.drop_duplicates().reset_index(drop=True)
|
117 |
+
|
118 |
+
def split_rows(df):
|
119 |
+
rows = []
|
120 |
+
for idx, row in df.iterrows():
|
121 |
+
min_values = row['Min'].split('\n')
|
122 |
+
max_values = row['Max'].split('\n')
|
123 |
+
option_groups = row['Option Group Name'].split('\n')
|
124 |
+
option_names = row['Option name'].split('\n')
|
125 |
+
option_prices = row['Option price'].split('\n')
|
126 |
+
|
127 |
+
for i in range(len(option_groups)):
|
128 |
+
current_row = {
|
129 |
+
'category': row['category'],
|
130 |
+
'item': row['item'],
|
131 |
+
'item-position': row['item-position'],
|
132 |
+
'img-url': row['img-url'],
|
133 |
+
'price': row['price'],
|
134 |
+
'Description': row['Description'],
|
135 |
+
'position': row['position'],
|
136 |
+
'Min': min_values[i] if i < len(min_values) else '',
|
137 |
+
'Max': max_values[i] if i < len(max_values) else '',
|
138 |
+
'Option Group Name': option_groups[i] if i < len(option_groups) else '',
|
139 |
+
'Option name': option_names[i] if i < len(option_names) else '',
|
140 |
+
'Option price': option_prices[i] if i < len(option_prices) else ''
|
141 |
+
}
|
142 |
+
rows.append(current_row)
|
143 |
+
return pd.DataFrame(rows)
|
144 |
+
|
145 |
+
split_df = split_rows(final_df)
|
146 |
+
split_df.fillna('', inplace=True)
|
147 |
+
split_df = split_df.replace('nan', '')
|
148 |
+
split_df.to_excel(f'{outlet_code}_1.xlsx', index=False)
|
149 |
+
|
150 |
+
# Final processing to JSON
|
151 |
+
df = pd.read_excel(f'{outlet_code}_1.xlsx')
|
152 |
+
json_data = df.to_json(orient='records')
|
153 |
+
with open(f'{outlet_code}_1.json', 'w') as f:
|
154 |
+
f.write(json_data)
|
155 |
+
|
156 |
+
# Process JSON data for final output
|
157 |
+
with open(f'{outlet_code}_1.json', 'r') as file:
|
158 |
+
data = json.load(file)
|
159 |
+
|
160 |
+
def process_item(item):
|
161 |
+
common_fields = {
|
162 |
+
'category': item['category'],
|
163 |
+
'category-position': item['position'],
|
164 |
+
'item': item['item'],
|
165 |
+
'item-position': item['item-position'],
|
166 |
+
'Description': item['Description'],
|
167 |
+
'img-url': item['img-url'],
|
168 |
+
'price': item['price']
|
169 |
+
}
|
170 |
+
|
171 |
+
options = []
|
172 |
+
for i in range(1, 38):
|
173 |
+
option_group_name = f"Option Group {i} Name"
|
174 |
+
option_name_key = f"Option {i} Name"
|
175 |
+
option_price_key = f"Option {i} Price"
|
176 |
+
min_values_key = f"Min{i}"
|
177 |
+
max_values_key = f"Max{i}"
|
178 |
+
|
179 |
+
if option_group_name in item and item[option_group_name]:
|
180 |
+
options.append({
|
181 |
+
'Option Group Name': item[option_group_name],
|
182 |
+
'Option Name': item[option_name_key],
|
183 |
+
'Option Price': item[option_price_key],
|
184 |
+
'Min': item[min_values_key],
|
185 |
+
'Max': item[max_values_key]
|
186 |
+
})
|
187 |
+
|
188 |
+
return {**common_fields, 'Options': options}
|
189 |
+
|
190 |
+
processed_items = [process_item(item) for item in data]
|
191 |
+
with open(f'processed_items_{outlet_code}.json', 'w') as outfile:
|
192 |
+
json.dump(processed_items, outfile, indent=4)
|
193 |
+
|
194 |
+
# Extract options
|
195 |
+
def extract_options(item):
|
196 |
+
option_data = {}
|
197 |
+
for option in item["Options"]:
|
198 |
+
option_group_name = option["Option Group Name"]
|
199 |
+
if option_group_name not in option_data:
|
200 |
+
option_data[option_group_name] = {
|
201 |
+
"Names": [],
|
202 |
+
"Prices": [],
|
203 |
+
"Mins": [],
|
204 |
+
"Maxs": [],
|
205 |
+
"GroupMin": option["Min"],
|
206 |
+
"GroupMax": option["Max"]
|
207 |
+
}
|
208 |
+
option_data[option_group_name]["Names"].append(option["Option Name"])
|
209 |
+
option_data[option_group_name]["Prices"].append(option["Option Price"])
|
210 |
+
option_data[option_group_name]["Mins"].append(option["Min"])
|
211 |
+
option_data[option_group_name]["Maxs"].append(option["Max"])
|
212 |
+
return option_data
|
213 |
+
|
214 |
+
with open(f'processed_items_{outlet_code}.json', 'r') as file:
|
215 |
+
data = json.load(file)
|
216 |
+
|
217 |
+
all_rows_data = []
|
218 |
+
for item in data:
|
219 |
+
item_info = {
|
220 |
+
'category': item['category'],
|
221 |
+
'category-position': item['category-position'],
|
222 |
+
'item': item['item'],
|
223 |
+
'item-position': item['item-position'],
|
224 |
+
'Description': item['Description'],
|
225 |
+
'img-url': item['img-url'],
|
226 |
+
'price': item['price']
|
227 |
+
}
|
228 |
+
options = extract_options(item)
|
229 |
+
max_options = 0
|
230 |
+
for group in options.values():
|
231 |
+
max_options = max(max_options, len(group["Names"]))
|
232 |
+
|
233 |
+
for i in range(max_options):
|
234 |
+
row = item_info.copy()
|
235 |
+
for group_name, group_data in options.items():
|
236 |
+
row[f"Option Group {group_name}"] = group_name
|
237 |
+
row[f"Option {group_name} Name"] = group_data["Names"][i] if i < len(group_data["Names"]) else ""
|
238 |
+
row[f"Option {group_name} Price"] = group_data["Prices"][i] if i < len(group_data["Prices"]) else ""
|
239 |
+
row[f"Min {group_name}"] = group_data["Mins"][i] if i < len(group_data["Mins"]) else ""
|
240 |
+
row[f"Max {group_name}"] = group_data["Maxs"][i] if i < len(group_data["Maxs"]) else ""
|
241 |
+
all_rows_data.append(row)
|
242 |
+
|
243 |
+
final_df = pd.DataFrame(all_rows_data)
|
244 |
+
final_df.to_excel(f'{outlet_code}_final_output.xlsx', index=False)
|
245 |
+
|
246 |
+
# MongoDB setup
|
247 |
+
mongo_host = st.secrets["mongo"]["host"]
|
248 |
+
mongo_username = st.secrets["mongo"]["username"]
|
249 |
+
mongo_password = st.secrets["mongo"]["password"]
|
250 |
+
|
251 |
+
# Connect to MongoDB
|
252 |
+
connection_str = f"mongodb+srv://{mongo_username}:{mongo_password}@{mongo_host}/test?retryWrites=true&w=majority"
|
253 |
+
me.connect(host=connection_str, tlsCAFile=certifi.where())
|
254 |
+
|
255 |
+
class Item(me.Document):
|
256 |
+
category = me.StringField()
|
257 |
+
category_position = me.IntField()
|
258 |
+
item = me.StringField()
|
259 |
+
item_position = me.IntField()
|
260 |
+
description = me.StringField()
|
261 |
+
img_url = me.StringField()
|
262 |
+
price = me.FloatField()
|
263 |
+
options = me.ListField()
|
264 |
+
|
265 |
+
items_collection = []
|
266 |
+
|
267 |
+
for index, row in final_df.iterrows():
|
268 |
+
options = []
|
269 |
+
for i in range(1, 38):
|
270 |
+
option_group = f"Option Group {i}"
|
271 |
+
option_name_key = f"Option {i} Name"
|
272 |
+
option_price_key = f"Option {i} Price"
|
273 |
+
min_key = f"Min {i}"
|
274 |
+
max_key = f"Max {i}"
|
275 |
+
|
276 |
+
if pd.notna(row[option_group]) and row[option_group]:
|
277 |
+
options.append({
|
278 |
+
"group_name": row[option_group],
|
279 |
+
"name": row[option_name_key],
|
280 |
+
"price": row[option_price_key],
|
281 |
+
"min": row[min_key],
|
282 |
+
"max": row[max_key]
|
283 |
+
})
|
284 |
+
|
285 |
+
item_doc = Item(
|
286 |
+
category=row['category'],
|
287 |
+
category_position=row['category-position'],
|
288 |
+
item=row['item'],
|
289 |
+
item_position=row['item-position'],
|
290 |
+
description=row['Description'],
|
291 |
+
img_url=row['img-url'],
|
292 |
+
price=row['price'],
|
293 |
+
options=options
|
294 |
+
)
|
295 |
+
items_collection.append(item_doc)
|
296 |
+
|
297 |
+
if items_collection:
|
298 |
+
Item.objects.insert(items_collection)
|
299 |
+
st.success("Data has been saved to MongoDB.")
|
300 |
+
else:
|
301 |
+
st.warning("No data to save to MongoDB.")
|
Mongo/Deliveroo_excel.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from Mongo.mongo_one import fetch_restaurant_data
|
2 |
+
|
3 |
+
def Deliveroo_excel_multy(urls,location):
|
4 |
+
for url in urls:
|
5 |
+
fetch_restaurant_data(url,location)
|
Mongo/Noonfood_Mongo_URL_From_location.py
ADDED
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
import mongoengine as me
|
6 |
+
from mongoengine import connect
|
7 |
+
import certifi
|
8 |
+
from bson.objectid import ObjectId
|
9 |
+
from fastapi import FastAPI, Query
|
10 |
+
from pydantic import BaseModel
|
11 |
+
|
12 |
+
db_name = 'NoonFood'
|
13 |
+
host = 'ac-ut7ghe7-shard-00-00.299tz43.mongodb.net'
|
14 |
+
port = 27017
|
15 |
+
username = 'sahlhubnoonfood'
|
16 |
+
password = '5JiDKBl6IPyH9Ff4'
|
17 |
+
|
18 |
+
# Connect to the MongoDB database
|
19 |
+
connect(db_name, host=host, port=port, username=username, password=password, ssl=True, tlsCAFile=certifi.where())
|
20 |
+
|
21 |
+
class NoonFoodLink(me.Document):
|
22 |
+
initial_response = me.DictField(required=True)
|
23 |
+
url_list = me.ListField(me.StringField(), required=True)
|
24 |
+
|
25 |
+
class MenuOption(me.Document):
|
26 |
+
name = me.StringField(required=True)
|
27 |
+
price = me.FloatField(required=True)
|
28 |
+
#location = me.StringField(required=True)
|
29 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
30 |
+
|
31 |
+
class MenuOptionGroup(me.Document):
|
32 |
+
group_name = me.StringField(required=True)
|
33 |
+
options = me.ListField(me.ReferenceField(MenuOption))
|
34 |
+
min_quantity = me.IntField(required=True)
|
35 |
+
max_quantity = me.IntField(required=True)
|
36 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
37 |
+
|
38 |
+
class MenuItem(me.Document):
|
39 |
+
category_name = me.StringField(required=True)
|
40 |
+
category_position = me.IntField(required=True)
|
41 |
+
name = me.StringField(required=True)
|
42 |
+
position = me.IntField(required=True)
|
43 |
+
description = me.StringField()
|
44 |
+
image_url = me.StringField()
|
45 |
+
price = me.FloatField(required=True)
|
46 |
+
option_groups = me.ListField(me.ReferenceField(MenuOptionGroup))
|
47 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
48 |
+
|
49 |
+
class MenuCategory(me.Document):
|
50 |
+
name = me.StringField(required=True)
|
51 |
+
items = me.ListField(me.ReferenceField(MenuItem))
|
52 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
53 |
+
|
54 |
+
# app = FastAPI()
|
55 |
+
|
56 |
+
class RestaurantDetailsRequest(BaseModel):
|
57 |
+
latitude: float
|
58 |
+
longitude: float
|
59 |
+
restaurant_url: str
|
60 |
+
|
61 |
+
class StoreDataRequest(BaseModel):
|
62 |
+
initial_response: dict
|
63 |
+
url_list: list
|
64 |
+
|
65 |
+
def Mongo_location_URLS(url1,latitude,longitude):
|
66 |
+
outlet_code = url1.split('/')[-2]
|
67 |
+
def extract_items_with_categories(menu):
|
68 |
+
items_list = []
|
69 |
+
categories_seen = set()
|
70 |
+
for category in menu['categories']:
|
71 |
+
category_name = category['name']
|
72 |
+
if category_name not in categories_seen:
|
73 |
+
items_list.append({
|
74 |
+
'category': category_name,
|
75 |
+
'item': None,
|
76 |
+
'itemCode': None,
|
77 |
+
'item-position': None,
|
78 |
+
'img-url': None,
|
79 |
+
'price': None,
|
80 |
+
'Description': None,
|
81 |
+
'position': category['position']
|
82 |
+
})
|
83 |
+
categories_seen.add(category_name)
|
84 |
+
for item_code in category['items']:
|
85 |
+
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
|
86 |
+
if item:
|
87 |
+
items_list.append({
|
88 |
+
'category': category_name,
|
89 |
+
'item': item['name'],
|
90 |
+
'itemCode': item['itemCode'],
|
91 |
+
'item-position': item['position'],
|
92 |
+
'img-url': item.get('image', ''),
|
93 |
+
'price': item.get('price', 0.0),
|
94 |
+
'Description': item.get('itemDesc', ''),
|
95 |
+
'position': category['position']
|
96 |
+
})
|
97 |
+
return items_list
|
98 |
+
|
99 |
+
# Extract options with proper formatting
|
100 |
+
def extract_options(menu):
|
101 |
+
options_dict = {}
|
102 |
+
for item in menu['items']:
|
103 |
+
if 'modifiers' in item:
|
104 |
+
for modifier_code in item['modifiers']:
|
105 |
+
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
|
106 |
+
if modifier:
|
107 |
+
if item['itemCode'] not in options_dict:
|
108 |
+
options_dict[item['itemCode']] = {}
|
109 |
+
if modifier['name'] not in options_dict[item['itemCode']]:
|
110 |
+
options_dict[item['itemCode']][modifier['name']] = {
|
111 |
+
'Min': modifier.get('minTotalOptions'),
|
112 |
+
'Max': modifier.get('maxTotalOptions'),
|
113 |
+
'Options': []
|
114 |
+
}
|
115 |
+
for option in modifier['options']:
|
116 |
+
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
|
117 |
+
if option_item:
|
118 |
+
options_dict[item['itemCode']][modifier['name']]['Options'].append({
|
119 |
+
'Option name': option_item['name'],
|
120 |
+
'Option price': option.get('price', 0.0)
|
121 |
+
})
|
122 |
+
return options_dict
|
123 |
+
|
124 |
+
def process_json(data, options, outlet_code):
|
125 |
+
def process_item(item):
|
126 |
+
common_fields = {
|
127 |
+
'category': item['category'],
|
128 |
+
'category-position': item['position'],
|
129 |
+
'item': item['item'],
|
130 |
+
'item-position': item['item-position'],
|
131 |
+
'Description': item['Description'],
|
132 |
+
'img-url': item['img-url'],
|
133 |
+
'price': item['price']
|
134 |
+
}
|
135 |
+
item_options = options.get(item['itemCode'], {})
|
136 |
+
option_groups = []
|
137 |
+
for group_name, group_data in item_options.items():
|
138 |
+
min_value = group_data['Min']
|
139 |
+
max_value = group_data['Max']
|
140 |
+
options_list = []
|
141 |
+
for option in group_data['Options']:
|
142 |
+
options_list.append({
|
143 |
+
'name': option['Option name'],
|
144 |
+
'price': option['Option price'],
|
145 |
+
})
|
146 |
+
option_groups.append({
|
147 |
+
'group_name': group_name,
|
148 |
+
'min_quantity': min_value,
|
149 |
+
'max_quantity': max_value,
|
150 |
+
'options': options_list,
|
151 |
+
})
|
152 |
+
return {
|
153 |
+
'item': common_fields,
|
154 |
+
'option_groups': option_groups
|
155 |
+
}
|
156 |
+
|
157 |
+
processed_items = []
|
158 |
+
for item in data:
|
159 |
+
processed_item = process_item(item)
|
160 |
+
processed_items.append(processed_item)
|
161 |
+
|
162 |
+
return processed_items
|
163 |
+
|
164 |
+
def store_in_db(processed_items):
|
165 |
+
categories_dict = {}
|
166 |
+
|
167 |
+
for item in processed_items:
|
168 |
+
item_common_fields = item['item']
|
169 |
+
option_groups = item['option_groups']
|
170 |
+
|
171 |
+
# Ensure required fields are present
|
172 |
+
if not item_common_fields['item'] or not item_common_fields['item-position'] or not item_common_fields['price']:
|
173 |
+
logging.error(f"Missing required fields in item: {item_common_fields}")
|
174 |
+
continue
|
175 |
+
|
176 |
+
# Create MenuItem
|
177 |
+
item_document = MenuItem(
|
178 |
+
category_name=item_common_fields['category'],
|
179 |
+
category_position=item_common_fields['category-position'],
|
180 |
+
name=item_common_fields['item'],
|
181 |
+
position=item_common_fields['item-position'],
|
182 |
+
description=item_common_fields['Description'],
|
183 |
+
image_url=item_common_fields['img-url'],
|
184 |
+
price=item_common_fields['price'],
|
185 |
+
option_groups=[]
|
186 |
+
)
|
187 |
+
item_document.save()
|
188 |
+
|
189 |
+
for group in option_groups:
|
190 |
+
options_list = []
|
191 |
+
for option_data in group['options']:
|
192 |
+
menu_option = MenuOption(
|
193 |
+
name=option_data['name'],
|
194 |
+
price=option_data['price'],
|
195 |
+
)
|
196 |
+
menu_option.save()
|
197 |
+
options_list.append(menu_option)
|
198 |
+
|
199 |
+
option_group = MenuOptionGroup(
|
200 |
+
group_name=group['group_name'],
|
201 |
+
min_quantity=group['min_quantity'],
|
202 |
+
max_quantity=group['max_quantity'],
|
203 |
+
options=options_list,
|
204 |
+
)
|
205 |
+
option_group.save()
|
206 |
+
item_document.option_groups.append(option_group)
|
207 |
+
|
208 |
+
item_document.save()
|
209 |
+
|
210 |
+
# Manage MenuCategory
|
211 |
+
category_name = item_common_fields['category']
|
212 |
+
if category_name not in categories_dict:
|
213 |
+
categories_dict[category_name] = MenuCategory(name=category_name, items=[])
|
214 |
+
categories_dict[category_name].items.append(item_document)
|
215 |
+
|
216 |
+
for category in categories_dict.values():
|
217 |
+
category.save()
|
218 |
+
|
219 |
+
|
220 |
+
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
|
221 |
+
payload = {
|
222 |
+
"addressLat": latitude,
|
223 |
+
"addressLng": longitude,
|
224 |
+
"deliveryType": "default",
|
225 |
+
"outletCode": outlet_code
|
226 |
+
}
|
227 |
+
headers = {
|
228 |
+
'Connection': 'keep-alive',
|
229 |
+
"Accept": "application/json, text/plain, */*",
|
230 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
231 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
232 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
233 |
+
"Content-Type": "application/json",
|
234 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
235 |
+
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
|
236 |
+
}
|
237 |
+
|
238 |
+
try:
|
239 |
+
response = requests.post(url, headers=headers, json=payload)
|
240 |
+
json_data = response.json()
|
241 |
+
items = extract_items_with_categories(json_data['data']['menu'])
|
242 |
+
options = extract_options(json_data['data']['menu'])
|
243 |
+
processed_items = process_json(items, options, outlet_code)
|
244 |
+
store_in_db(processed_items)
|
245 |
+
|
246 |
+
return {"message": "Restaurant details extracted and stored successfully."}
|
247 |
+
|
248 |
+
except Exception as e:
|
249 |
+
logging.error(f"An error occurred: {str(e)}")
|
250 |
+
return {"error": "An error occurred while processing the request."}
|
Mongo/Noonfood_location.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
from urllib.parse import urljoin
|
4 |
+
import pymongo
|
5 |
+
from pymongo import MongoClient
|
6 |
+
import certifi
|
7 |
+
from Mongo.Noonfood_Mongo_URL_From_location import Mongo_location_URLS
|
8 |
+
|
9 |
+
|
10 |
+
def mutiple_url_location(lat , lng):
|
11 |
+
def get_initial_data(lat, lng):
|
12 |
+
initial_url = "https://food.noon.com/_svc/customer-v1/customer/public-area-serviceable"
|
13 |
+
initial_payload = {"lat": str(lat), "lng": str(lng)}
|
14 |
+
initial_headers = {
|
15 |
+
"Accept": "application/json, text/plain, */*",
|
16 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
17 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
18 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
19 |
+
"Content-Type": "application/json",
|
20 |
+
"Origin": "https://food.noon.com",
|
21 |
+
"Referer": "https://food.noon.com/",
|
22 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
23 |
+
"X-Content": "desktop",
|
24 |
+
"X-Experience": "food",
|
25 |
+
"X-Locale": "en-ae",
|
26 |
+
"X-Mp": "noon",
|
27 |
+
"X-Platform": "web",
|
28 |
+
"X-Visitor-Id": "1072f8c2-cfd6-4734-8795-d637de61fba1",
|
29 |
+
}
|
30 |
+
|
31 |
+
try:
|
32 |
+
response = requests.post(initial_url, json=initial_payload, headers=initial_headers)
|
33 |
+
response.raise_for_status()
|
34 |
+
json_data = response.json()
|
35 |
+
print(f"Initial Response JSON: {json_data}")
|
36 |
+
|
37 |
+
new_lat = json_data.get('lat')
|
38 |
+
new_lng = json_data.get('lng')
|
39 |
+
|
40 |
+
if new_lat and new_lng:
|
41 |
+
return json_data, new_lat, new_lng
|
42 |
+
else:
|
43 |
+
print("lat or lng not found in the initial response.")
|
44 |
+
return None, None, None
|
45 |
+
except requests.exceptions.RequestException as e:
|
46 |
+
print(f"Request failed: {e}")
|
47 |
+
return None, None, None
|
48 |
+
|
49 |
+
def get_urls_from_json(json_data):
|
50 |
+
url_list = []
|
51 |
+
try:
|
52 |
+
results = json_data.get("results", [])
|
53 |
+
if len(results) > 5:
|
54 |
+
banners = results[5]['modules'][0].get("banners", [])
|
55 |
+
for n in range(min(20, len(banners))):
|
56 |
+
try:
|
57 |
+
base_url = 'https://food.noon.com/'
|
58 |
+
want_data = banners[n].get("linkUrl")
|
59 |
+
if want_data:
|
60 |
+
full_url = urljoin(base_url, want_data)
|
61 |
+
url_list.append(full_url)
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error occurred while extracting URLs: {e}")
|
64 |
+
break
|
65 |
+
else:
|
66 |
+
print("Insufficient data in 'results'")
|
67 |
+
except Exception as e:
|
68 |
+
print(f"Error parsing JSON data: {e}")
|
69 |
+
return url_list
|
70 |
+
|
71 |
+
def store_data_in_mongo(json_data, url_list):
|
72 |
+
try:
|
73 |
+
# MongoDB connection settings
|
74 |
+
client = MongoClient(
|
75 |
+
"mongodb+srv://dipenigenerate:[email protected]",
|
76 |
+
tlsCAFile=certifi.where()
|
77 |
+
)
|
78 |
+
db = client['Restaurants_in_dubai']
|
79 |
+
collection = db['noonfood_link']
|
80 |
+
|
81 |
+
# Create the document to insert
|
82 |
+
document = {
|
83 |
+
"initial_response": json_data,
|
84 |
+
"url_list": url_list
|
85 |
+
}
|
86 |
+
|
87 |
+
# Insert the document into the collection
|
88 |
+
result = collection.insert_one(document)
|
89 |
+
print(f"Data inserted with id: {result.inserted_id}")
|
90 |
+
except Exception as e:
|
91 |
+
print(f"Failed to store data in MongoDB: {e}")
|
92 |
+
|
93 |
+
def fetch_urls(lat, lng):
|
94 |
+
json_data, new_lat, new_lng = get_initial_data(lat, lng)
|
95 |
+
|
96 |
+
if new_lat and new_lng:
|
97 |
+
new_url = "https://food.noon.com/_svc/mp-food-api-catalog/api/"
|
98 |
+
new_headers = {
|
99 |
+
"method": "GET",
|
100 |
+
"Accept": "application/json, text/plain, */*",
|
101 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
102 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
103 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
104 |
+
"Content-Type": "application/json",
|
105 |
+
"Origin": "https://food.noon.com",
|
106 |
+
"Referer": "https://food.noon.com/",
|
107 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
108 |
+
"X-Content": "desktop",
|
109 |
+
"X-Experience": "food",
|
110 |
+
"X-Locale": "en-ae",
|
111 |
+
"X-Mp": "noon",
|
112 |
+
"X-Platform": "web",
|
113 |
+
"X-lat": f"{new_lat}",
|
114 |
+
"X-lng": f"{new_lng}"
|
115 |
+
}
|
116 |
+
|
117 |
+
try:
|
118 |
+
new_response = requests.get(new_url, headers=new_headers)
|
119 |
+
new_response.raise_for_status()
|
120 |
+
new_json_data = new_response.json()
|
121 |
+
|
122 |
+
# Extract URLs from the JSON data
|
123 |
+
url_list = get_urls_from_json(new_json_data)
|
124 |
+
|
125 |
+
# Store the initial JSON response and URLs in MongoDB
|
126 |
+
store_data_in_mongo(json_data, url_list)
|
127 |
+
|
128 |
+
return url_list
|
129 |
+
|
130 |
+
except requests.exceptions.RequestException as e:
|
131 |
+
print(f"Failed to retrieve new content from the URL: {e}")
|
132 |
+
return []
|
133 |
+
else:
|
134 |
+
return []
|
135 |
+
urls = fetch_urls(lat, lng)
|
136 |
+
if urls:
|
137 |
+
print("Fetched URLs:" , urls)
|
138 |
+
for url in urls:
|
139 |
+
url = url+"/"
|
140 |
+
Mongo_location_URLS(url,lat,lng)
|
141 |
+
|
142 |
+
# if __name__ == "__main__":
|
143 |
+
# lat = int(input("Enter the latitude: "))
|
144 |
+
# lng = int(input("Enter the longitude: "))
|
145 |
+
|
146 |
+
# urls = fetch_urls(lat, lng)
|
147 |
+
# if urls:
|
148 |
+
# print("Fetched URLs:" , urls)
|
149 |
+
# for url in urls:
|
150 |
+
# url = url+"/"
|
151 |
+
# Mongo_location_URLS(url,lat,lng)
|
152 |
+
# else:
|
153 |
+
# print("No URLs fetched.")
|
Mongo/Noonfood_mongo_Single_URL.py
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
import mongoengine as me
|
6 |
+
from mongoengine import connect
|
7 |
+
import certifi
|
8 |
+
from bson.objectid import ObjectId
|
9 |
+
from fastapi import FastAPI, Query
|
10 |
+
from pydantic import BaseModel
|
11 |
+
|
12 |
+
# Define the connection settings
|
13 |
+
db_name = 'NoonFood'
|
14 |
+
host = 'ac-ut7ghe7-shard-00-00.299tz43.mongodb.net'
|
15 |
+
port = 27017
|
16 |
+
username = 'sahlhubnoonfood'
|
17 |
+
password = '5JiDKBl6IPyH9Ff4'
|
18 |
+
alias = 'default'
|
19 |
+
|
20 |
+
me.disconnect(alias)
|
21 |
+
me.connect(db_name, host=host, port=port, username=username, password=password, ssl=True, tlsCAFile=certifi.where())
|
22 |
+
|
23 |
+
class NoonFoodLink(me.Document):
|
24 |
+
initial_response = me.DictField(required=True)
|
25 |
+
url_list = me.ListField(me.StringField(), required=True)
|
26 |
+
|
27 |
+
class MenuOption(me.Document):
|
28 |
+
name = me.StringField(required=True)
|
29 |
+
price = me.FloatField(required=True)
|
30 |
+
#location = me.StringField(required=True)
|
31 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
32 |
+
|
33 |
+
class MenuOptionGroup(me.Document):
|
34 |
+
group_name = me.StringField(required=True)
|
35 |
+
options = me.ListField(me.ReferenceField(MenuOption))
|
36 |
+
min_quantity = me.IntField(required=True)
|
37 |
+
max_quantity = me.IntField(required=True)
|
38 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
39 |
+
|
40 |
+
class MenuItem(me.Document):
|
41 |
+
category_name = me.StringField(required=True)
|
42 |
+
category_position = me.IntField(required=True)
|
43 |
+
name = me.StringField(required=True)
|
44 |
+
position = me.IntField(required=True)
|
45 |
+
description = me.StringField()
|
46 |
+
image_url = me.StringField()
|
47 |
+
price = me.FloatField(required=True)
|
48 |
+
option_groups = me.ListField(me.ReferenceField(MenuOptionGroup))
|
49 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
50 |
+
|
51 |
+
class MenuCategory(me.Document):
|
52 |
+
name = me.StringField(required=True)
|
53 |
+
items = me.ListField(me.ReferenceField(MenuItem))
|
54 |
+
#restro_ref_id = me.ReferenceField(NoonFoodLink)
|
55 |
+
|
56 |
+
# app = FastAPI()
|
57 |
+
|
58 |
+
class RestaurantDetailsRequest(BaseModel):
|
59 |
+
latitude: float
|
60 |
+
longitude: float
|
61 |
+
restaurant_url: str
|
62 |
+
|
63 |
+
class StoreDataRequest(BaseModel):
|
64 |
+
initial_response: dict
|
65 |
+
url_list: list
|
66 |
+
|
67 |
+
# Extract items with categories
|
68 |
+
def extract_items_with_categories(menu):
|
69 |
+
items_list = []
|
70 |
+
categories_seen = set()
|
71 |
+
for category in menu['categories']:
|
72 |
+
category_name = category['name']
|
73 |
+
if category_name not in categories_seen:
|
74 |
+
items_list.append({
|
75 |
+
'category': category_name,
|
76 |
+
'item': None,
|
77 |
+
'itemCode': None,
|
78 |
+
'item-position': None,
|
79 |
+
'img-url': None,
|
80 |
+
'price': None,
|
81 |
+
'Description': None,
|
82 |
+
'position': category['position']
|
83 |
+
})
|
84 |
+
categories_seen.add(category_name)
|
85 |
+
for item_code in category['items']:
|
86 |
+
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
|
87 |
+
if item:
|
88 |
+
items_list.append({
|
89 |
+
'category': category_name,
|
90 |
+
'item': item['name'],
|
91 |
+
'itemCode': item['itemCode'],
|
92 |
+
'item-position': item['position'],
|
93 |
+
'img-url': item.get('image', ''),
|
94 |
+
'price': item.get('price', 0.0),
|
95 |
+
'Description': item.get('itemDesc', ''),
|
96 |
+
'position': category['position']
|
97 |
+
})
|
98 |
+
return items_list
|
99 |
+
|
100 |
+
# Extract options with proper formatting
|
101 |
+
def extract_options(menu):
|
102 |
+
options_dict = {}
|
103 |
+
for item in menu['items']:
|
104 |
+
if 'modifiers' in item:
|
105 |
+
for modifier_code in item['modifiers']:
|
106 |
+
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
|
107 |
+
if modifier:
|
108 |
+
if item['itemCode'] not in options_dict:
|
109 |
+
options_dict[item['itemCode']] = {}
|
110 |
+
if modifier['name'] not in options_dict[item['itemCode']]:
|
111 |
+
options_dict[item['itemCode']][modifier['name']] = {
|
112 |
+
'Min': modifier.get('minTotalOptions'),
|
113 |
+
'Max': modifier.get('maxTotalOptions'),
|
114 |
+
'Options': []
|
115 |
+
}
|
116 |
+
for option in modifier['options']:
|
117 |
+
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
|
118 |
+
if option_item:
|
119 |
+
options_dict[item['itemCode']][modifier['name']]['Options'].append({
|
120 |
+
'Option name': option_item['name'],
|
121 |
+
'Option price': option.get('price', 0.0)
|
122 |
+
})
|
123 |
+
return options_dict
|
124 |
+
|
125 |
+
def process_json(data, options, outlet_code):
|
126 |
+
def process_item(item):
|
127 |
+
common_fields = {
|
128 |
+
'category': item['category'],
|
129 |
+
'category-position': item['position'],
|
130 |
+
'item': item['item'],
|
131 |
+
'item-position': item['item-position'],
|
132 |
+
'Description': item['Description'],
|
133 |
+
'img-url': item['img-url'],
|
134 |
+
'price': item['price']
|
135 |
+
}
|
136 |
+
item_options = options.get(item['itemCode'], {})
|
137 |
+
option_groups = []
|
138 |
+
for group_name, group_data in item_options.items():
|
139 |
+
min_value = group_data['Min']
|
140 |
+
max_value = group_data['Max']
|
141 |
+
options_list = []
|
142 |
+
for option in group_data['Options']:
|
143 |
+
options_list.append({
|
144 |
+
'name': option['Option name'],
|
145 |
+
'price': option['Option price'],
|
146 |
+
})
|
147 |
+
option_groups.append({
|
148 |
+
'group_name': group_name,
|
149 |
+
'min_quantity': min_value,
|
150 |
+
'max_quantity': max_value,
|
151 |
+
'options': options_list,
|
152 |
+
})
|
153 |
+
return {
|
154 |
+
'item': common_fields,
|
155 |
+
'option_groups': option_groups
|
156 |
+
}
|
157 |
+
|
158 |
+
processed_items = []
|
159 |
+
for item in data:
|
160 |
+
processed_item = process_item(item)
|
161 |
+
processed_items.append(processed_item)
|
162 |
+
|
163 |
+
return processed_items
|
164 |
+
|
165 |
+
def store_in_db(processed_items):
|
166 |
+
categories_dict = {}
|
167 |
+
|
168 |
+
for item in processed_items:
|
169 |
+
item_common_fields = item['item']
|
170 |
+
option_groups = item['option_groups']
|
171 |
+
|
172 |
+
# Ensure required fields are present
|
173 |
+
if not item_common_fields['item'] or not item_common_fields['item-position'] or not item_common_fields['price']:
|
174 |
+
logging.error(f"Missing required fields in item: {item_common_fields}")
|
175 |
+
continue
|
176 |
+
|
177 |
+
# Create MenuItem
|
178 |
+
item_document = MenuItem(
|
179 |
+
category_name=item_common_fields['category'],
|
180 |
+
category_position=item_common_fields['category-position'],
|
181 |
+
name=item_common_fields['item'],
|
182 |
+
position=item_common_fields['item-position'],
|
183 |
+
description=item_common_fields['Description'],
|
184 |
+
image_url=item_common_fields['img-url'],
|
185 |
+
price=item_common_fields['price'],
|
186 |
+
option_groups=[]
|
187 |
+
)
|
188 |
+
item_document.save()
|
189 |
+
|
190 |
+
for group in option_groups:
|
191 |
+
options_list = []
|
192 |
+
for option_data in group['options']:
|
193 |
+
menu_option = MenuOption(
|
194 |
+
name=option_data['name'],
|
195 |
+
price=option_data['price'],
|
196 |
+
)
|
197 |
+
menu_option.save()
|
198 |
+
options_list.append(menu_option)
|
199 |
+
|
200 |
+
option_group = MenuOptionGroup(
|
201 |
+
group_name=group['group_name'],
|
202 |
+
min_quantity=group['min_quantity'],
|
203 |
+
max_quantity=group['max_quantity'],
|
204 |
+
options=options_list,
|
205 |
+
)
|
206 |
+
option_group.save()
|
207 |
+
item_document.option_groups.append(option_group)
|
208 |
+
|
209 |
+
item_document.save()
|
210 |
+
|
211 |
+
# Manage MenuCategory
|
212 |
+
category_name = item_common_fields['category']
|
213 |
+
if category_name not in categories_dict:
|
214 |
+
categories_dict[category_name] = MenuCategory(name=category_name, items=[])
|
215 |
+
categories_dict[category_name].items.append(item_document)
|
216 |
+
|
217 |
+
for category in categories_dict.values():
|
218 |
+
category.save()
|
219 |
+
|
220 |
+
|
221 |
+
def get_restaurant_details(latitude,longitude,url1):
|
222 |
+
outlet_code = url1.split('/')[-2]
|
223 |
+
|
224 |
+
# Make the request to fetch restaurant details
|
225 |
+
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
|
226 |
+
payload = {
|
227 |
+
"addressLat": latitude,
|
228 |
+
"addressLng": longitude,
|
229 |
+
"deliveryType": "default",
|
230 |
+
"outletCode": outlet_code
|
231 |
+
}
|
232 |
+
headers = {
|
233 |
+
'Connection': 'keep-alive',
|
234 |
+
"Accept": "application/json, text/plain, */*",
|
235 |
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
236 |
+
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
|
237 |
+
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
|
238 |
+
"Content-Type": "application/json",
|
239 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
240 |
+
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
|
241 |
+
}
|
242 |
+
|
243 |
+
try:
|
244 |
+
response = requests.post(url, headers=headers, json=payload)
|
245 |
+
json_data = response.json()
|
246 |
+
items = extract_items_with_categories(json_data['data']['menu'])
|
247 |
+
options = extract_options(json_data['data']['menu'])
|
248 |
+
processed_items = process_json(items, options, outlet_code)
|
249 |
+
store_in_db(processed_items)
|
250 |
+
|
251 |
+
return {"message": "Restaurant details extracted and stored successfully."}
|
252 |
+
|
253 |
+
except Exception as e:
|
254 |
+
logging.error(f"An error occurred: {str(e)}")
|
255 |
+
return {"error": "An error occurred while processing the request."}
|
256 |
+
|
257 |
+
# @app.post("/store_data")
|
258 |
+
# def store_data(details: StoreDataRequest):
|
259 |
+
# try:
|
260 |
+
# # Create a document from the request data
|
261 |
+
# document = NoonFoodLink(
|
262 |
+
# initial_response=details.initial_response,
|
263 |
+
# url_list=details.url_list
|
264 |
+
# )
|
265 |
+
# document.save()
|
266 |
+
# return {"message": "Data stored successfully."}
|
267 |
+
# except Exception as e:
|
268 |
+
# return {"error": str(e)}
|
269 |
+
|
270 |
+
# if __name__ == "__main__":
|
271 |
+
# import uvicorn
|
272 |
+
|
273 |
+
# # Run FastAPI application using Uvicorn
|
274 |
+
# uvicorn.run(app, host="127.0.0.1", port=8000)
|
Mongo/Noonfood_multy.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from Mongo.Noonfood_mongo_Single_URL import get_restaurant_details
|
2 |
+
|
3 |
+
def Noonfood_multy_urls(latitude,longitude,urls):
|
4 |
+
for url in urls:
|
5 |
+
get_restaurant_details(latitude,longitude,url)
|
6 |
+
|
Mongo/__init__.py
ADDED
File without changes
|
Mongo/__pycache__/Deliveroo_excel.cpython-311.pyc
ADDED
Binary file (495 Bytes). View file
|
|
Mongo/__pycache__/Deliveroo_excel.cpython-312.pyc
ADDED
Binary file (470 Bytes). View file
|
|
Mongo/__pycache__/Noonfood_Mongo_URL_From_location.cpython-311.pyc
ADDED
Binary file (13.6 kB). View file
|
|
Mongo/__pycache__/Noonfood_Mongo_URL_From_location.cpython-312.pyc
ADDED
Binary file (11.9 kB). View file
|
|
Mongo/__pycache__/Noonfood_location.cpython-311.pyc
ADDED
Binary file (6.85 kB). View file
|
|
Mongo/__pycache__/Noonfood_location.cpython-312.pyc
ADDED
Binary file (6.14 kB). View file
|
|
Mongo/__pycache__/Noonfood_mongo_Single_URL.cpython-311.pyc
ADDED
Binary file (13.4 kB). View file
|
|
Mongo/__pycache__/Noonfood_mongo_Single_URL.cpython-312.pyc
ADDED
Binary file (11.8 kB). View file
|
|
Mongo/__pycache__/Noonfood_multy.cpython-311.pyc
ADDED
Binary file (525 Bytes). View file
|
|
Mongo/__pycache__/Noonfood_multy.cpython-312.pyc
ADDED
Binary file (500 Bytes). View file
|
|
Mongo/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (167 Bytes). View file
|
|
Mongo/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (202 Bytes). View file
|
|
Mongo/__pycache__/mongo_final.cpython-311.pyc
ADDED
Binary file (13.8 kB). View file
|
|
Mongo/__pycache__/mongo_final.cpython-312.pyc
ADDED
Binary file (12 kB). View file
|
|
Mongo/__pycache__/mongo_one.cpython-311.pyc
ADDED
Binary file (11.7 kB). View file
|
|
Mongo/__pycache__/mongo_one.cpython-312.pyc
ADDED
Binary file (10.1 kB). View file
|
|
Mongo/mongo_final.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import mongoengine as me
|
5 |
+
import certifi
|
6 |
+
from fastapi import FastAPI, HTTPException
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from typing import List
|
9 |
+
from urllib.parse import urljoin
|
10 |
+
import uvicorn
|
11 |
+
|
12 |
+
# MongoDB connection settings
|
13 |
+
db_name = 'Deliveroo'
|
14 |
+
host = 'ac-xvwfer4-shard-00-00.xrv1il9.mongodb.net'
|
15 |
+
port = 27017
|
16 |
+
username = 'sahlhubdeliveroo'
|
17 |
+
password = 'vp5bLCjufKdarP1W'
|
18 |
+
alias = 'default'
|
19 |
+
|
20 |
+
me.disconnect(alias)
|
21 |
+
me.connect(db_name, host=host, port=port, username=username, password=password, ssl=True, tlsCAFile=certifi.where())
|
22 |
+
|
23 |
+
# Define the MongoDB document models
|
24 |
+
class DeliverooLink(me.Document):
|
25 |
+
href = me.StringField(required=True)
|
26 |
+
|
27 |
+
class MenuOption(me.Document):
|
28 |
+
name = me.StringField(required=True)
|
29 |
+
price = me.StringField(required=True)
|
30 |
+
option_groups = me.ListField(me.ReferenceField('MenuOptionGroup'))
|
31 |
+
location = me.StringField(required=True)
|
32 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
33 |
+
|
34 |
+
class MenuOptionGroup(me.Document):
|
35 |
+
group_name = me.StringField(required=True)
|
36 |
+
options = me.ListField(me.ReferenceField(MenuOption))
|
37 |
+
min_quantity = me.IntField(required=True)
|
38 |
+
max_quantity = me.IntField(required=True)
|
39 |
+
location = me.StringField(required=True)
|
40 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
41 |
+
|
42 |
+
class MenuItem(me.Document):
|
43 |
+
category_name = me.StringField(required=True)
|
44 |
+
name = me.StringField(required=True)
|
45 |
+
description = me.StringField()
|
46 |
+
image_url = me.StringField()
|
47 |
+
price = me.StringField(required=True)
|
48 |
+
option_groups = me.ListField(me.ReferenceField(MenuOptionGroup))
|
49 |
+
location = me.StringField(required=True)
|
50 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
51 |
+
|
52 |
+
class MenuCategory(me.Document):
|
53 |
+
name = me.StringField(required=True)
|
54 |
+
items = me.ListField(me.ReferenceField(MenuItem))
|
55 |
+
location = me.StringField(required=True)
|
56 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
57 |
+
|
58 |
+
# FastAPI app
|
59 |
+
# app = FastAPI()
|
60 |
+
|
61 |
+
# Pydantic model for input validation
|
62 |
+
# class LocationRequest(BaseModel):
|
63 |
+
# city: str
|
64 |
+
# location: str
|
65 |
+
|
66 |
+
# @app.post("/fetch_and_process_data")
|
67 |
+
# async def fetch_and_process_data(request: LocationRequest):
|
68 |
+
# city = request.city.lower()
|
69 |
+
# location = request.location.lower()
|
70 |
+
# print(f"Fetching restaurant links for {city}, {location}...")
|
71 |
+
|
72 |
+
# links = fetch_restaurant_links(city, location)
|
73 |
+
# if not links:
|
74 |
+
# raise HTTPException(status_code=404, detail="No restaurants found for the specified location.")
|
75 |
+
|
76 |
+
# print(f"Found {len(links)} links. Processing restaurant data...")
|
77 |
+
# data = process_links(links, location)
|
78 |
+
|
79 |
+
# print("Saving data to MongoDB...")
|
80 |
+
# save_data_to_db(data, location)
|
81 |
+
|
82 |
+
# print("Data has been processed and saved successfully.")
|
83 |
+
# return {"message": "Data has been processed and saved successfully."}
|
84 |
+
|
85 |
+
def fetch_restaurant_links(city, location):
|
86 |
+
base_url = "https://deliveroo.ae"
|
87 |
+
url = f"{base_url}/restaurants/{city}/{location}/?collection=restaurants"
|
88 |
+
|
89 |
+
headers = {
|
90 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
91 |
+
'Cookie': '__cf_bm=oakl46sJ3V9vwmnIIbfXWfkHbGmmC2pH56GyTI33b4U-1715931048-1.0.1.1-4XOcSGSThZV_INfpn3aptlo8jpZtLFbYoLsZxP9BpQ8LIjq3wBIe8CPlSf0AomuniXy4TZWyVlBQBTlrm.CPiSfI1jzx18y9zxwc9GX0fmo; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=2e989653-2776-4ede-a52e-b610f1ad64a2'
|
92 |
+
}
|
93 |
+
|
94 |
+
response = requests.get(url, headers=headers)
|
95 |
+
if response.status_code == 200:
|
96 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
97 |
+
|
98 |
+
if "We couldn't find" in soup.text or "No restaurants" in soup.text:
|
99 |
+
print("No restaurants found for the specified location.")
|
100 |
+
return []
|
101 |
+
|
102 |
+
divs = soup.find_all('div', class_=["HomeFeedScrollTracker-bd9a6ffea8a4b4b7", "HomeFeedUICard-157f7be5d7b2fa7b"])
|
103 |
+
|
104 |
+
hrefs = [a_tag['href'] for div in divs for a_tag in div.find_all('a', href=True)]
|
105 |
+
print(hrefs)
|
106 |
+
hrefs = hrefs[:20]
|
107 |
+
|
108 |
+
full_urls = []
|
109 |
+
for href in hrefs:
|
110 |
+
full_url = urljoin(base_url, href)
|
111 |
+
link = DeliverooLink(href=full_url)
|
112 |
+
link.save()
|
113 |
+
full_urls.append(full_url)
|
114 |
+
|
115 |
+
return full_urls
|
116 |
+
else:
|
117 |
+
print("Response timed out.")
|
118 |
+
return []
|
119 |
+
|
120 |
+
def fetch_restaurant_data(url, location):
|
121 |
+
headers = {
|
122 |
+
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
|
123 |
+
}
|
124 |
+
|
125 |
+
response = requests.get(url, headers=headers)
|
126 |
+
|
127 |
+
if response.status_code != 200:
|
128 |
+
print(f"Failed to fetch the URL: {url}")
|
129 |
+
return None
|
130 |
+
|
131 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
132 |
+
script_tag = soup.find('script', id='__NEXT_DATA__')
|
133 |
+
|
134 |
+
if not script_tag:
|
135 |
+
print("Script tag not found")
|
136 |
+
return None
|
137 |
+
|
138 |
+
json_data = json.loads(script_tag.string)
|
139 |
+
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
|
140 |
+
|
141 |
+
items = json_data['items']
|
142 |
+
categories = json_data['categories']
|
143 |
+
category_map = {category['id']: category['name'] for category in categories}
|
144 |
+
modifier_groups = json_data['modifierGroups']
|
145 |
+
|
146 |
+
"""modifier_groups_dict = {item['id']: [] for item in items}
|
147 |
+
for modifier_group in modifier_groups:
|
148 |
+
modifier_groups_dict[modifier_group['id']] = modifier_group"""
|
149 |
+
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
|
150 |
+
|
151 |
+
items_with_modifiers = []
|
152 |
+
for item in items:
|
153 |
+
category_id = item['categoryId']
|
154 |
+
category_name = category_map.get(category_id, 'Unknown')
|
155 |
+
|
156 |
+
# Skip items with the "Unknown" category
|
157 |
+
if category_name == 'Unknown':
|
158 |
+
continue
|
159 |
+
|
160 |
+
item_with_modifiers = {
|
161 |
+
"id": item['id'],
|
162 |
+
"category_id": category_id,
|
163 |
+
"category_name": category_name,
|
164 |
+
"name": item['name'],
|
165 |
+
"description": item.get('description', ''),
|
166 |
+
"price": item['price']['formatted'],
|
167 |
+
"img_url": item.get('image').get('url', '') if item.get('image') else '',
|
168 |
+
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
|
169 |
+
"location": location,
|
170 |
+
"restro_ref_id": url
|
171 |
+
}
|
172 |
+
items_with_modifiers.append(item_with_modifiers)
|
173 |
+
|
174 |
+
return items_with_modifiers
|
175 |
+
|
176 |
+
# def save_data_to_json(data, filename='restaurants.json'):
|
177 |
+
# with open(filename, 'w') as f:
|
178 |
+
# json.dump(data, f, indent=4)
|
179 |
+
|
180 |
+
def process_links(urls, location):
|
181 |
+
all_data = []
|
182 |
+
for url in urls:
|
183 |
+
data = fetch_restaurant_data(url, location)
|
184 |
+
if data:
|
185 |
+
all_data.extend(data)
|
186 |
+
# save_data_to_json(all_data)
|
187 |
+
return all_data
|
188 |
+
|
189 |
+
def save_data_to_db(data, location):
|
190 |
+
# Dictionary to hold categories and associated items
|
191 |
+
category_dict = {}
|
192 |
+
|
193 |
+
for item_data in data:
|
194 |
+
category_name = item_data['category_name']
|
195 |
+
name = item_data['name']
|
196 |
+
description = item_data.get('description', '')
|
197 |
+
image_url = item_data.get('img_url', '')
|
198 |
+
price = item_data['price']
|
199 |
+
option_groups_data = item_data.get('modifier_groups', [])
|
200 |
+
restro_ref_id_url = item_data['restro_ref_id']
|
201 |
+
|
202 |
+
restro_link = DeliverooLink.objects(href=restro_ref_id_url).first()
|
203 |
+
if not restro_link:
|
204 |
+
restro_link = DeliverooLink(href=restro_ref_id_url)
|
205 |
+
restro_link.save()
|
206 |
+
|
207 |
+
# Create or retrieve MenuOption instances
|
208 |
+
option_groups = []
|
209 |
+
for group_data in option_groups_data:
|
210 |
+
group_name = group_data['name']
|
211 |
+
min_quantity = group_data.get('minSelection', 0)
|
212 |
+
max_quantity = group_data.get('maxSelection', 1)
|
213 |
+
options_data = group_data.get('modifierOptions', [])
|
214 |
+
|
215 |
+
option_group = MenuOptionGroup.objects(
|
216 |
+
group_name=group_name, min_quantity=min_quantity, max_quantity=max_quantity, location=location, restro_ref_id=restro_link).first()
|
217 |
+
|
218 |
+
if not option_group:
|
219 |
+
option_group = MenuOptionGroup(
|
220 |
+
group_name=group_name,
|
221 |
+
min_quantity=min_quantity,
|
222 |
+
max_quantity=max_quantity,
|
223 |
+
location=location,
|
224 |
+
restro_ref_id=restro_link
|
225 |
+
)
|
226 |
+
option_group.save()
|
227 |
+
|
228 |
+
options = []
|
229 |
+
for option_data in options_data:
|
230 |
+
option_name = option_data['name']
|
231 |
+
option_price = option_data['price']['formatted']
|
232 |
+
|
233 |
+
option = MenuOption.objects(name=option_name, price=option_price, location=location, restro_ref_id=restro_link).first()
|
234 |
+
if not option:
|
235 |
+
option = MenuOption(name=option_name, price=option_price, location=location, restro_ref_id=restro_link)
|
236 |
+
option.save()
|
237 |
+
|
238 |
+
if option not in option_group.options:
|
239 |
+
option_group.options.append(option)
|
240 |
+
option_group.save()
|
241 |
+
|
242 |
+
if option_group not in option.option_groups:
|
243 |
+
option.option_groups.append(option_group)
|
244 |
+
option.save()
|
245 |
+
|
246 |
+
options.append(option)
|
247 |
+
|
248 |
+
option_groups.append(option_group)
|
249 |
+
|
250 |
+
# Create or retrieve MenuItem instance
|
251 |
+
menu_item = MenuItem.objects(
|
252 |
+
category_name=category_name, name=name, location=location, restro_ref_id=restro_link).first()
|
253 |
+
if not menu_item:
|
254 |
+
menu_item = MenuItem(
|
255 |
+
category_name=category_name,
|
256 |
+
name=name,
|
257 |
+
description=description,
|
258 |
+
image_url=image_url,
|
259 |
+
price=price,
|
260 |
+
option_groups=option_groups,
|
261 |
+
location=location,
|
262 |
+
restro_ref_id=restro_link
|
263 |
+
)
|
264 |
+
menu_item.save()
|
265 |
+
else:
|
266 |
+
# Ensure option groups are updated for existing menu items
|
267 |
+
for option_group in option_groups:
|
268 |
+
if option_group not in menu_item.option_groups:
|
269 |
+
menu_item.option_groups.append(option_group)
|
270 |
+
menu_item.save()
|
271 |
+
|
272 |
+
# Store the menu item under its category
|
273 |
+
if category_name not in category_dict:
|
274 |
+
category_dict[category_name] = []
|
275 |
+
category_dict[category_name].append(menu_item)
|
276 |
+
|
277 |
+
# Create or retrieve MenuCategory instances and associate items
|
278 |
+
for category_name, items in category_dict.items():
|
279 |
+
menu_category = MenuCategory.objects(name=category_name, location=location, restro_ref_id=restro_link).first()
|
280 |
+
if not menu_category:
|
281 |
+
menu_category = MenuCategory(name=category_name, location=location, restro_ref_id=restro_link)
|
282 |
+
menu_category.save()
|
283 |
+
|
284 |
+
for menu_item in items:
|
285 |
+
if menu_item not in menu_category.items:
|
286 |
+
menu_category.items.append(menu_item)
|
287 |
+
menu_category.save()
|
288 |
+
|
289 |
+
print(f"Saved item '{name}' in category '{category_name}' for location '{location}'")
|
290 |
+
|
291 |
+
# if __name__ == "__main__":
|
292 |
+
# uvicorn.run(app, host="0.0.0.0", port=8000)
|
Mongo/mongo_one.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import mongoengine as me
|
5 |
+
import certifi
|
6 |
+
from fastapi import FastAPI, HTTPException
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from typing import List
|
9 |
+
from urllib.parse import urljoin
|
10 |
+
import uvicorn
|
11 |
+
|
12 |
+
# MongoDB connection settings
|
13 |
+
db_name = 'Deliveroo'
|
14 |
+
host = 'ac-xvwfer4-shard-00-00.xrv1il9.mongodb.net'
|
15 |
+
port = 27017
|
16 |
+
username = 'sahlhubdeliveroo'
|
17 |
+
password = 'vp5bLCjufKdarP1W'
|
18 |
+
|
19 |
+
|
20 |
+
me.disconnect()
|
21 |
+
# Connect to the MongoDB database
|
22 |
+
me.connect(db_name, host=host, port=port, username=username, password=password, ssl=True, tlsCAFile=certifi.where())
|
23 |
+
|
24 |
+
# Define the MongoDB document models
|
25 |
+
class DeliverooLink(me.Document):
|
26 |
+
href = me.StringField(required=True)
|
27 |
+
|
28 |
+
class MenuOption(me.Document):
|
29 |
+
name = me.StringField(required=True)
|
30 |
+
price = me.StringField(required=True)
|
31 |
+
option_groups = me.ListField(me.ReferenceField('MenuOptionGroup'))
|
32 |
+
location = me.StringField(required=True)
|
33 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
34 |
+
|
35 |
+
class MenuOptionGroup(me.Document):
|
36 |
+
group_name = me.StringField(required=True)
|
37 |
+
options = me.ListField(me.ReferenceField(MenuOption))
|
38 |
+
min_quantity = me.IntField(required=True)
|
39 |
+
max_quantity = me.IntField(required=True)
|
40 |
+
location = me.StringField(required=True)
|
41 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
42 |
+
|
43 |
+
class MenuItem(me.Document):
|
44 |
+
category_name = me.StringField(required=True)
|
45 |
+
name = me.StringField(required=True)
|
46 |
+
description = me.StringField()
|
47 |
+
image_url = me.StringField()
|
48 |
+
price = me.StringField(required=True)
|
49 |
+
option_groups = me.ListField(me.ReferenceField(MenuOptionGroup))
|
50 |
+
location = me.StringField(required=True)
|
51 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
52 |
+
|
53 |
+
class MenuCategory(me.Document):
|
54 |
+
name = me.StringField(required=True)
|
55 |
+
items = me.ListField(me.ReferenceField(MenuItem))
|
56 |
+
location = me.StringField(required=True)
|
57 |
+
restro_ref_id = me.ReferenceField(DeliverooLink, required=True)
|
58 |
+
|
59 |
+
# FastAPI app
|
60 |
+
|
61 |
+
# Pydantic model for input validation
|
62 |
+
# class URLRequest(BaseModel):
|
63 |
+
# city: str
|
64 |
+
# location: str
|
65 |
+
# url: str
|
66 |
+
|
67 |
+
# @app.post("/fetch_and_process_data")
|
68 |
+
# async def fetch_and_process_data(request: URLRequest):
|
69 |
+
# city = request.city.lower()
|
70 |
+
# location = request.location.lower()
|
71 |
+
# url = request.url
|
72 |
+
# print(f"Fetching restaurant data for {city}, {location}...")
|
73 |
+
|
74 |
+
# print(f"Processing restaurant data from URL: {url}...")
|
75 |
+
# data = fetch_restaurant_data(url, location)
|
76 |
+
|
77 |
+
# if not data:
|
78 |
+
# raise HTTPException(status_code=404, detail="No data found for the specified URL.")
|
79 |
+
|
80 |
+
# print("Saving data to MongoDB...")
|
81 |
+
# save_data_to_db(data, location)
|
82 |
+
|
83 |
+
# print("Data has been processed and saved successfully.")
|
84 |
+
# return {"message": "Data has been processed and saved successfully."}
|
85 |
+
|
86 |
+
def fetch_restaurant_data(url, location):
|
87 |
+
headers = {
|
88 |
+
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
|
89 |
+
}
|
90 |
+
|
91 |
+
response = requests.get(url, headers=headers)
|
92 |
+
|
93 |
+
if response.status_code != 200:
|
94 |
+
print(f"Failed to fetch the URL: {url}")
|
95 |
+
return None
|
96 |
+
|
97 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
98 |
+
script_tag = soup.find('script', id='__NEXT_DATA__')
|
99 |
+
|
100 |
+
if not script_tag:
|
101 |
+
print("Script tag not found")
|
102 |
+
return None
|
103 |
+
|
104 |
+
json_data = json.loads(script_tag.string)
|
105 |
+
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
|
106 |
+
|
107 |
+
items = json_data['items']
|
108 |
+
categories = json_data['categories']
|
109 |
+
category_map = {category['id']: category['name'] for category in categories}
|
110 |
+
modifier_groups = json_data['modifierGroups']
|
111 |
+
|
112 |
+
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
|
113 |
+
|
114 |
+
items_with_modifiers = []
|
115 |
+
for item in items:
|
116 |
+
category_id = item['categoryId']
|
117 |
+
category_name = category_map.get(category_id, 'Unknown')
|
118 |
+
|
119 |
+
# Skip items with the "Unknown" category
|
120 |
+
if category_name == 'Unknown':
|
121 |
+
continue
|
122 |
+
|
123 |
+
item_with_modifiers = {
|
124 |
+
"id": item['id'],
|
125 |
+
"category_id": category_id,
|
126 |
+
"category_name": category_name,
|
127 |
+
"name": item['name'],
|
128 |
+
"description": item.get('description', ''),
|
129 |
+
"price": item['price']['formatted'],
|
130 |
+
"img_url": item.get('image').get('url', '') if item.get('image') else '',
|
131 |
+
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
|
132 |
+
"location": location,
|
133 |
+
"restro_ref_id": url
|
134 |
+
}
|
135 |
+
items_with_modifiers.append(item_with_modifiers)
|
136 |
+
|
137 |
+
return items_with_modifiers
|
138 |
+
|
139 |
+
def save_data_to_json(data, filename='restaurants.json'):
|
140 |
+
with open(filename, 'w') as f:
|
141 |
+
json.dump(data, f, indent=4)
|
142 |
+
|
143 |
+
def save_data_to_db(data, location):
|
144 |
+
# Dictionary to hold categories and associated items
|
145 |
+
category_dict = {}
|
146 |
+
|
147 |
+
for item_data in data:
|
148 |
+
category_name = item_data['category_name']
|
149 |
+
name = item_data['name']
|
150 |
+
description = item_data.get('description', '')
|
151 |
+
image_url = item_data.get('img_url', '')
|
152 |
+
price = item_data['price']
|
153 |
+
option_groups_data = item_data.get('modifier_groups', [])
|
154 |
+
restro_ref_id_url = item_data['restro_ref_id']
|
155 |
+
|
156 |
+
restro_link = DeliverooLink.objects(href=restro_ref_id_url).first()
|
157 |
+
if not restro_link:
|
158 |
+
restro_link = DeliverooLink(href=restro_ref_id_url)
|
159 |
+
restro_link.save()
|
160 |
+
|
161 |
+
# Create or retrieve MenuOption instances
|
162 |
+
option_groups = []
|
163 |
+
for group_data in option_groups_data:
|
164 |
+
group_name = group_data['name']
|
165 |
+
min_quantity = group_data.get('minSelection', 0)
|
166 |
+
max_quantity = group_data.get('maxSelection', 1)
|
167 |
+
options_data = group_data.get('modifierOptions', [])
|
168 |
+
|
169 |
+
option_group = MenuOptionGroup.objects(
|
170 |
+
group_name=group_name, min_quantity=min_quantity, max_quantity=max_quantity, location=location, restro_ref_id=restro_link).first()
|
171 |
+
|
172 |
+
if not option_group:
|
173 |
+
option_group = MenuOptionGroup(
|
174 |
+
group_name=group_name,
|
175 |
+
min_quantity=min_quantity,
|
176 |
+
max_quantity=max_quantity,
|
177 |
+
location=location,
|
178 |
+
restro_ref_id=restro_link
|
179 |
+
)
|
180 |
+
option_group.save()
|
181 |
+
|
182 |
+
options = []
|
183 |
+
for option_data in options_data:
|
184 |
+
option_name = option_data['name']
|
185 |
+
option_price = option_data['price']['formatted']
|
186 |
+
|
187 |
+
option = MenuOption.objects(name=option_name, price=option_price, location=location, restro_ref_id=restro_link).first()
|
188 |
+
if not option:
|
189 |
+
option = MenuOption(name=option_name, price=option_price, location=location, restro_ref_id=restro_link)
|
190 |
+
option.save()
|
191 |
+
|
192 |
+
if option not in option_group.options:
|
193 |
+
option_group.options.append(option)
|
194 |
+
option_group.save()
|
195 |
+
|
196 |
+
if option_group not in option.option_groups:
|
197 |
+
option.option_groups.append(option_group)
|
198 |
+
option.save()
|
199 |
+
|
200 |
+
options.append(option)
|
201 |
+
|
202 |
+
option_groups.append(option_group)
|
203 |
+
|
204 |
+
# Create or retrieve MenuItem instance
|
205 |
+
menu_item = MenuItem.objects(
|
206 |
+
category_name=category_name, name=name, location=location, restro_ref_id=restro_link).first()
|
207 |
+
if not menu_item:
|
208 |
+
menu_item = MenuItem(
|
209 |
+
category_name=category_name,
|
210 |
+
name=name,
|
211 |
+
description=description,
|
212 |
+
image_url=image_url,
|
213 |
+
price=price,
|
214 |
+
option_groups=option_groups,
|
215 |
+
location=location,
|
216 |
+
restro_ref_id=restro_link
|
217 |
+
)
|
218 |
+
menu_item.save()
|
219 |
+
else:
|
220 |
+
# Ensure option groups are updated for existing menu items
|
221 |
+
for option_group in option_groups:
|
222 |
+
if option_group not in menu_item.option_groups:
|
223 |
+
menu_item.option_groups.append(option_group)
|
224 |
+
menu_item.save()
|
225 |
+
|
226 |
+
# Store the menu item under its category
|
227 |
+
if category_name not in category_dict:
|
228 |
+
category_dict[category_name] = []
|
229 |
+
category_dict[category_name].append(menu_item)
|
230 |
+
|
231 |
+
# Create or retrieve MenuCategory instances and associate items
|
232 |
+
for category_name, items in category_dict.items():
|
233 |
+
menu_category = MenuCategory.objects(name=category_name, location=location, restro_ref_id=restro_link).first()
|
234 |
+
if not menu_category:
|
235 |
+
menu_category = MenuCategory(name=category_name, location=location, restro_ref_id=restro_link)
|
236 |
+
menu_category.save()
|
237 |
+
|
238 |
+
for menu_item in items:
|
239 |
+
if menu_item not in menu_category.items:
|
240 |
+
menu_category.items.append(menu_item)
|
241 |
+
menu_category.save()
|
242 |
+
|
243 |
+
print(f"Saved item '{name}' in category '{category_name}' for location '{location}'")
|
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException,Response
|
2 |
+
from pydantic import BaseModel
|
3 |
+
import uvicorn
|
4 |
+
from Mongo.mongo_one import fetch_restaurant_data, save_data_to_db
|
5 |
+
from Mongo.mongo_final import fetch_restaurant_links,save_data_to_db,process_links
|
6 |
+
from Excel.excel_final import Excel_final
|
7 |
+
from Mongo.Noonfood_mongo_Single_URL import get_restaurant_details
|
8 |
+
from Mongo.Noonfood_location import mutiple_url_location
|
9 |
+
from fastapi.responses import StreamingResponse
|
10 |
+
import zipfile
|
11 |
+
import os
|
12 |
+
from Excel.Noonfood_excel import process_url
|
13 |
+
import io
|
14 |
+
from Mongo.Deliveroo_excel import Deliveroo_excel_multy
|
15 |
+
from Mongo.Noonfood_multy import Noonfood_multy_urls
|
16 |
+
from starlette.responses import RedirectResponse
|
17 |
+
|
18 |
+
# FastAPI app
|
19 |
+
app = FastAPI()
|
20 |
+
|
21 |
+
# Pydantic model for input validation
|
22 |
+
class URLRequest(BaseModel):
|
23 |
+
location: str
|
24 |
+
url: list
|
25 |
+
class LocationRequest(BaseModel):
|
26 |
+
city: str
|
27 |
+
location: str
|
28 |
+
class Excel_From_URL(BaseModel):
|
29 |
+
url:str
|
30 |
+
|
31 |
+
|
32 |
+
class FetchAndStoreRequest(BaseModel):
|
33 |
+
latitude: int
|
34 |
+
longitude: int
|
35 |
+
|
36 |
+
class GenerateAndDownload(BaseModel):
|
37 |
+
latitude:int
|
38 |
+
longitude : int
|
39 |
+
url:list
|
40 |
+
|
41 |
+
class RestaurantDetailsRequest(BaseModel):
|
42 |
+
latitude: float
|
43 |
+
longitude: float
|
44 |
+
url: str
|
45 |
+
|
46 |
+
|
47 |
+
@app.get("/")
|
48 |
+
def Docsmain():
|
49 |
+
return RedirectResponse(url="/docs")
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
@app.post("/Deliveroo_Excel_From_URL",tags=["Deliveroo"])
|
54 |
+
def Excel_From_URL(request: Excel_From_URL):
|
55 |
+
|
56 |
+
try:
|
57 |
+
output, filename = Excel_final(request.url)
|
58 |
+
headers = {
|
59 |
+
'Content-Disposition': f'attachment; filename="{filename}"'
|
60 |
+
}
|
61 |
+
return Response(content=output.getvalue(), media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', headers=headers)
|
62 |
+
except HTTPException as e:
|
63 |
+
raise e
|
64 |
+
except Exception as e:
|
65 |
+
raise HTTPException(status_code=500, detail=str(e))
|
66 |
+
|
67 |
+
@app.post("/MongoDB_Data_Store__From_One_URL",tags=["Deliveroo"])
|
68 |
+
def MongoDB_Data_Store__From_One_URL(request: URLRequest):
|
69 |
+
location = request.location.lower()
|
70 |
+
url = request.url
|
71 |
+
|
72 |
+
print(f"Processing restaurant data from URL: {url}...")
|
73 |
+
data = Deliveroo_excel_multy(url, location)
|
74 |
+
|
75 |
+
if not data:
|
76 |
+
raise HTTPException(status_code=404, detail="No data found for the specified URL.")
|
77 |
+
|
78 |
+
print("Saving data to MongoDB...")
|
79 |
+
save_data_to_db(data, location)
|
80 |
+
|
81 |
+
print("Data has been processed and saved successfully.")
|
82 |
+
return {"message": "Data has been processed and saved successfully."}
|
83 |
+
|
84 |
+
@app.post("/MongoDB_Data_Store__From_Location",tags=["Deliveroo"])
|
85 |
+
def MongoDB_Data_Store__From_Location(request: LocationRequest):
|
86 |
+
city = request.city.lower()
|
87 |
+
location = request.location.lower()
|
88 |
+
print(f"Fetching restaurant links for {city}, {location}...")
|
89 |
+
|
90 |
+
links = fetch_restaurant_links(city, location)
|
91 |
+
if not links:
|
92 |
+
raise HTTPException(status_code=404, detail="No restaurants found for the specified location.")
|
93 |
+
|
94 |
+
print(f"Found {len(links)} links. Processing restaurant data...")
|
95 |
+
data = process_links(links, location)
|
96 |
+
|
97 |
+
print("Saving data to MongoDB...")
|
98 |
+
save_data_to_db(data, location)
|
99 |
+
|
100 |
+
print("Data has been processed and saved successfully.")
|
101 |
+
return {"message": "Data has been processed and saved successfully."}
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
@app.post("/Mongo_From_Url",tags=["Noonfood"])
|
106 |
+
def Mongo_url_data(request :GenerateAndDownload):
|
107 |
+
latitude = request.latitude
|
108 |
+
longitude = request.longitude
|
109 |
+
url=request.url
|
110 |
+
try:
|
111 |
+
data =Noonfood_multy_urls(latitude,longitude,url)
|
112 |
+
return {"message": "Extraction process completed successfully", "data": data}
|
113 |
+
except HTTPException as e:
|
114 |
+
raise e
|
115 |
+
except Exception as e:
|
116 |
+
raise HTTPException(status_code=500, detail=str(e))
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
@app.post("/MONGO_DB_FROM_LOCATION",tags=["Noonfood"])
|
121 |
+
def MOngo_DB_LOCATION(request: FetchAndStoreRequest):
|
122 |
+
latitude = request.latitude
|
123 |
+
longitude = request.longitude
|
124 |
+
|
125 |
+
try:
|
126 |
+
data =mutiple_url_location(latitude,longitude)
|
127 |
+
return {"message": "Extraction process completed successfully", "data": data}
|
128 |
+
except HTTPException as e:
|
129 |
+
raise e
|
130 |
+
except Exception as e:
|
131 |
+
raise HTTPException(status_code=500, detail=str(e))
|
132 |
+
|
133 |
+
|
134 |
+
@app.post("/Noon_Food_EXCEL",tags=["Noonfood"], response_class=StreamingResponse)
|
135 |
+
def Noon_Food_EXCEL(details: RestaurantDetailsRequest):
|
136 |
+
files = []
|
137 |
+
try:
|
138 |
+
output, filename = process_url(details.url, details.latitude, details.longitude)
|
139 |
+
headers = {
|
140 |
+
'Content-Disposition': f'attachment; filename="{filename}"'
|
141 |
+
}
|
142 |
+
return Response(content=output.getvalue(), media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', headers=headers)
|
143 |
+
except HTTPException as e:
|
144 |
+
raise e
|
145 |
+
except Exception as e:
|
146 |
+
raise HTTPException(status_code=500, detail=str(e))
|
147 |
+
|
148 |
+
|
149 |
+
|
150 |
+
if __name__ == "__main__":
|
151 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mongoengine
|
2 |
+
requests
|
3 |
+
beautifulsoup4
|
4 |
+
certifi
|
5 |
+
fastapi
|
6 |
+
requests
|
7 |
+
beautifulsoup4
|
8 |
+
pandas
|
9 |
+
openpyxl
|
10 |
+
pydantic
|
11 |
+
uvicorn
|