Spaces:

Ahmadshahzad2
/

listing-gen

Sleeping

File size: 8,487 Bytes

e3d7308

import requests
import base64
from langchain_core.output_parsers import JsonOutputParser
import base64
from langchain.chains import TransformChain
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain import globals
from langchain_core.runnables import chain
from langchain_core.pydantic_v1 import BaseModel, Field
import os
from openai import OpenAI
from dotenv import load_dotenv
import json

load_dotenv()
# Imgur and SERP API credentials
imgur_client_id = os.getenv('imgur_client_id')
serp_api_key = os.getenv('serp_api_key')
search_endpoint = 'https://serpapi.com/search'

# Set up your OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
# Replace with your OpenAI API key
gpt_api_key = os.getenv('gpt_api_key')


def upload_image_to_imgur(image_path):
    headers = {'Authorization': f'Client-ID {imgur_client_id}'}
    data = {'image': open(image_path, 'rb').read()}
    response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
    response_data = response.json()
    if response.status_code == 200 and response_data['success']:
        return response_data['data']['link']
    else:
        raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")

def reverse_image_search(image_url):
    params = {
        'engine': 'google_reverse_image',
        'image_url': image_url,
        # "image_content": image_url,
        'api_key': serp_api_key
    }
    response = requests.get(search_endpoint, params=params)
    return response.json()

def extract_titles_and_descriptions(search_results, top_n=3):
    titles_and_descriptions = []
    for result in search_results.get('image_results', [])[:top_n]:
        temp_dict = {}
        title = result.get('title', '')
        description = result.get('snippet', '')
        temp_dict['title'] = title
        temp_dict['description'] = description
        titles_and_descriptions.append(temp_dict)
    return titles_and_descriptions

def load_image(inputs: dict) -> dict:
    """Load image from file and encode it as base64."""
    image_path = inputs["image_path"]
  
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    image_base64 = encode_image(image_path)
    return {"image": image_base64}

class ImageInformation(BaseModel):
    """Information about an image."""

    Title: str = Field(description="Suitable title for the given product in image")
    image_description: str = Field(description="a short description of the image")
    #  main_objects: list[str] = Field(description="list of the main objects on the picture")


# Set verbose
# globals.set_debug(True)

@chain
def image_model(inputs: dict) -> str | list[str] | dict:
    """Invoke model with image and prompt."""
    model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
    msg = model.invoke(
                [HumanMessage(
                content=[
                {"type": "text", "text": inputs["prompt"]},
                {"type": "text", "text": parser.get_format_instructions()},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
                ])]
                )
    return msg.content


load_image_chain = TransformChain(
    input_variables=["image_path"],
    output_variables=["image"],
    transform=load_image
)

parser = JsonOutputParser(pydantic_object=ImageInformation)
def get_image_informations(image_path: str) -> dict:
    vision_prompt = """
    Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information:
    - The extracted text written on the product.
    - Title of the product in image based on the extracted text
    """
    vision_chain = load_image_chain | image_model | parser
    return vision_chain.invoke({'image_path': f'{image_path}', 
                                'prompt': vision_prompt})

def parse_json_response(response):
    # Remove the enclosing markers if present
    if response.startswith("```json") and response.endswith("```"):
        response = response[7:-3].strip()
    
    # Load the response as a JSON object
    data = json.loads(response)
    
    # Find the key that contains the list of items
    listings_key = None
    for key, value in data.items():
        if isinstance(value, list) and all(isinstance(item, dict) for item in value):
            listings_key = key
            break
    
    if not listings_key:
        raise ValueError("No valid listings key found in the response")
    
    listings = data[listings_key]
    
    # Create a list to store the parsed dictionaries
    parsed_data = []
    
    # Iterate through each item in the listings
    for item in listings:
        # Extract the title and features
        title = item.get("Title", "")
        features = item.get("Features", [])
        
        # Create a dictionary for each item
        item_dict = {
            "Title": title,
            "Features": features
        }
        
        # Append the dictionary to the list
        parsed_data.append(item_dict)
    
    return parsed_data

def main(image_path):
    # try:
        # Upload image to Imgur and get the URL
        image_url = upload_image_to_imgur(image_path)
        print(f"Image uploaded to Imgur: {image_url}")

        # Perform reverse image search
        search_results = reverse_image_search(image_url)
        if 'error' in search_results:
            print("Error in Serp API:", search_results['error'])
        

        # Extract titles and descriptions
        serp_results = extract_titles_and_descriptions(search_results)
        print("Serp Result: ",serp_results, "\n\n\n\n")

        gpt_vision_result = get_image_informations(image_path)
        print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n")


        # Prompt to generate the JSON for the product listing
        prompt = f'''
        You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image.
        Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result. 
        #### SERP Results:
        {serp_results}

        #### GPT Vision Result:
        {gpt_vision_result}


        Generate a JSON for product listing (at Least THREE) based on the above results.

        #### Please provide in the form of a json. Following is the format of the json::

        
        {{
            "Listings": [
                {{
                    "Title": "Example Title",
                    "Features": [
                        "Feature 1",
                        "Feature 2",
                        "Feature 3",
                        .,
                        .,
                        .,
                        .,
                        .,
                        "feature N"
                    ]
                }},

                {{
                    "Title": "Example Title",
                    "Features": [
                        "Feature 1",
                        "Feature 2",
                        "Feature 3",
                        .,
                        .,
                        .,
                        .,
                        .,
                        "feature N"
                    ]
                }}


            ]
        }}

        '''

        gpt_model = OpenAI(api_key=gpt_api_key)
        # Call the ChatGPT 3.5 model using the chat completion endpoint
        response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ])
        # Extract the text from the response
        generated_text = response.choices[0].message.content

        print("Generated Text: ",generated_text)
        parsed_data = parse_json_response(generated_text)
        # Print the ChatGPT response

        return parsed_data

if __name__ == "__main__":
    image_path = 'sampleImages/edited3.jpg'  # Replace with the path to your local image
    main(image_path)