Spaces:

Viraj2307
/

TE-Scrapper

Sleeping

File size: 5,313 Bytes

e79fbb1

def Talabat_excel_extract(url):
    import requests
    import json
    import pandas as pd
    from bs4 import BeautifulSoup
    from urllib.parse import urlparse
    from io import BytesIO

    def extract_choices(item_id, restaurant_id):
        choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
        response = requests.get(choice_url, headers=headers)
        if response.status_code == 200:
            choice_data = response.json()
            return choice_data
        else:
            print("Failed to retrieve choices for item ID:", item_id)
            return None

    # url = input("Enter restaurant URL: ")
    parsed_url = urlparse(url)
    path_segments = parsed_url.path.split('/')

    restaurant_id = path_segments[-2]
    restaurant_name = path_segments[-1]

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    response = requests.get(url, headers=headers)
    j = 0
    category_name_list = []

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        script_tag = soup.find('script', id='__NEXT_DATA__')

        if script_tag:
            json_content = json.loads(script_tag.string.strip())

            menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']

            menu_items_list = []

            for i,item in enumerate(menu_data):
                item_id = item['id']
                name = item['name']
                description = item['description']
                price = item['price']
                original_image = item['originalImage']
                original_section = item['originalSection']
                has_choices = item['hasChoices']


                if original_section not in category_name_list:
                    category_name_list.append(original_section)
                    j = j+1
                    Category_position = j

                else:
                    Category_position = j


                menu_item = {
                    "Category": original_section,
                    "Category_position": Category_position,
                    "Item_name": name,
                    "Item_position": i+1,
                    "Image": original_image,
                    "description": description,
                    "price": price,
                    "id": item_id
                }

                menu_items_list.append(menu_item)

                if has_choices:
                    choice_data = extract_choices(item_id, restaurant_id)
                    if choice_data:
                        choice_for_item = choice_data["result"].get('choiceForItem', [])[0]  # Accessing the first element of the list if exists
                        choice_sections = choice_for_item.get('choiceSections', [])
                        grouped_data = {}
                        for option_group in choice_sections:
                            option_group_name = option_group.get('nm', '')
                            min_quantity = option_group.get('mnq', '')
                            max_quantity = option_group.get('mxq', '')
                            options = option_group.get('ich', [])
                            for option_index, option in enumerate(options, start=1):
                                option_name = option.get('nm', '')
                                option_price = option.get('pr', '')
                                grouped_data.setdefault(option_group_name, {
                                    "Option_group_name": option_group_name,
                                    "Min_quantity": min_quantity,
                                    "Max_quantity": max_quantity
                                })
                                grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
                                grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price

                        menu_items_list.extend(grouped_data.values())

            df = pd.DataFrame(menu_items_list)

            if 'Max_quantity' in df.columns:
                max_column_index = df.columns.get_loc('Max_quantity')
                for i in range(max_column_index + 1, len(df.columns)):
                    df.rename(columns={df.columns[i]: ''}, inplace=True)

                option_group_name_index = df.columns.get_loc('Option_group_name')
                for i in range(option_group_name_index, len(df.columns)):
                    df.iloc[:, i] = df.iloc[:, i].shift(-1)
                    
            df_cleaned = df.dropna(how='all')    
            # excel_file = f"{restaurant_name}_menu.xlsx"
            # df.to_excel(excel_file, index=False)
            # print(f"Menu items saved to {excel_file}")
            excel_file = BytesIO()
            df_cleaned.to_excel(excel_file, index=False)
            excel_file.seek(0)  # Move to the beginning of the BytesIO stream

            return excel_file, f"{restaurant_name}_menu.xlsx"
        else:
            print("Script tag with id '__NEXT_DATA__' not found.")
    else:
        print(f"Failed to get menu items. Status code: {response.status_code}")