Spaces:

Viraj2307
/

TE-Scrapper

Sleeping

TE-Scrapper / Talabat_files /Talabat_excel_final.py

viraj

Initial Commit

e79fbb1 4 months ago

5.31 kB

	def Talabat_excel_extract(url):
	import requests
	import json
	import pandas as pd
	from bs4 import BeautifulSoup
	from urllib.parse import urlparse
	from io import BytesIO

	def extract_choices(item_id, restaurant_id):
	choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
	response = requests.get(choice_url, headers=headers)
	if response.status_code == 200:
	choice_data = response.json()
	return choice_data
	else:
	print("Failed to retrieve choices for item ID:", item_id)
	return None

	# url = input("Enter restaurant URL: ")
	parsed_url = urlparse(url)
	path_segments = parsed_url.path.split('/')

	restaurant_id = path_segments[-2]
	restaurant_name = path_segments[-1]

	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
	}
	response = requests.get(url, headers=headers)
	j = 0
	category_name_list = []

	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')
	script_tag = soup.find('script', id='__NEXT_DATA__')

	if script_tag:
	json_content = json.loads(script_tag.string.strip())

	menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']

	menu_items_list = []

	for i,item in enumerate(menu_data):
	item_id = item['id']
	name = item['name']
	description = item['description']
	price = item['price']
	original_image = item['originalImage']
	original_section = item['originalSection']
	has_choices = item['hasChoices']


	if original_section not in category_name_list:
	category_name_list.append(original_section)
	j = j+1
	Category_position = j

	else:
	Category_position = j


	menu_item = {
	"Category": original_section,
	"Category_position": Category_position,
	"Item_name": name,
	"Item_position": i+1,
	"Image": original_image,
	"description": description,
	"price": price,
	"id": item_id
	}

	menu_items_list.append(menu_item)

	if has_choices:
	choice_data = extract_choices(item_id, restaurant_id)
	if choice_data:
	choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists
	choice_sections = choice_for_item.get('choiceSections', [])
	grouped_data = {}
	for option_group in choice_sections:
	option_group_name = option_group.get('nm', '')
	min_quantity = option_group.get('mnq', '')
	max_quantity = option_group.get('mxq', '')
	options = option_group.get('ich', [])
	for option_index, option in enumerate(options, start=1):
	option_name = option.get('nm', '')
	option_price = option.get('pr', '')
	grouped_data.setdefault(option_group_name, {
	"Option_group_name": option_group_name,
	"Min_quantity": min_quantity,
	"Max_quantity": max_quantity
	})
	grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
	grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price

	menu_items_list.extend(grouped_data.values())

	df = pd.DataFrame(menu_items_list)

	if 'Max_quantity' in df.columns:
	max_column_index = df.columns.get_loc('Max_quantity')
	for i in range(max_column_index + 1, len(df.columns)):
	df.rename(columns={df.columns[i]: ''}, inplace=True)

	option_group_name_index = df.columns.get_loc('Option_group_name')
	for i in range(option_group_name_index, len(df.columns)):
	df.iloc[:, i] = df.iloc[:, i].shift(-1)

	df_cleaned = df.dropna(how='all')
	# excel_file = f"{restaurant_name}_menu.xlsx"
	# df.to_excel(excel_file, index=False)
	# print(f"Menu items saved to {excel_file}")
	excel_file = BytesIO()
	df_cleaned.to_excel(excel_file, index=False)
	excel_file.seek(0) # Move to the beginning of the BytesIO stream

	return excel_file, f"{restaurant_name}_menu.xlsx"
	else:
	print("Script tag with id '__NEXT_DATA__' not found.")
	else:
	print(f"Failed to get menu items. Status code: {response.status_code}")