TE-Scrapper / Talabat_files /Talabat_excel_final.py
viraj
Initial Commit
e79fbb1
def Talabat_excel_extract(url):
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from io import BytesIO
def extract_choices(item_id, restaurant_id):
choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
response = requests.get(choice_url, headers=headers)
if response.status_code == 200:
choice_data = response.json()
return choice_data
else:
print("Failed to retrieve choices for item ID:", item_id)
return None
# url = input("Enter restaurant URL: ")
parsed_url = urlparse(url)
path_segments = parsed_url.path.split('/')
restaurant_id = path_segments[-2]
restaurant_name = path_segments[-1]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(url, headers=headers)
j = 0
category_name_list = []
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag:
json_content = json.loads(script_tag.string.strip())
menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
menu_items_list = []
for i,item in enumerate(menu_data):
item_id = item['id']
name = item['name']
description = item['description']
price = item['price']
original_image = item['originalImage']
original_section = item['originalSection']
has_choices = item['hasChoices']
if original_section not in category_name_list:
category_name_list.append(original_section)
j = j+1
Category_position = j
else:
Category_position = j
menu_item = {
"Category": original_section,
"Category_position": Category_position,
"Item_name": name,
"Item_position": i+1,
"Image": original_image,
"description": description,
"price": price,
"id": item_id
}
menu_items_list.append(menu_item)
if has_choices:
choice_data = extract_choices(item_id, restaurant_id)
if choice_data:
choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists
choice_sections = choice_for_item.get('choiceSections', [])
grouped_data = {}
for option_group in choice_sections:
option_group_name = option_group.get('nm', '')
min_quantity = option_group.get('mnq', '')
max_quantity = option_group.get('mxq', '')
options = option_group.get('ich', [])
for option_index, option in enumerate(options, start=1):
option_name = option.get('nm', '')
option_price = option.get('pr', '')
grouped_data.setdefault(option_group_name, {
"Option_group_name": option_group_name,
"Min_quantity": min_quantity,
"Max_quantity": max_quantity
})
grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price
menu_items_list.extend(grouped_data.values())
df = pd.DataFrame(menu_items_list)
if 'Max_quantity' in df.columns:
max_column_index = df.columns.get_loc('Max_quantity')
for i in range(max_column_index + 1, len(df.columns)):
df.rename(columns={df.columns[i]: ''}, inplace=True)
option_group_name_index = df.columns.get_loc('Option_group_name')
for i in range(option_group_name_index, len(df.columns)):
df.iloc[:, i] = df.iloc[:, i].shift(-1)
df_cleaned = df.dropna(how='all')
# excel_file = f"{restaurant_name}_menu.xlsx"
# df.to_excel(excel_file, index=False)
# print(f"Menu items saved to {excel_file}")
excel_file = BytesIO()
df_cleaned.to_excel(excel_file, index=False)
excel_file.seek(0) # Move to the beginning of the BytesIO stream
return excel_file, f"{restaurant_name}_menu.xlsx"
else:
print("Script tag with id '__NEXT_DATA__' not found.")
else:
print(f"Failed to get menu items. Status code: {response.status_code}")