listing-gen / pipeline.py
Ahmad Shahzad
app added
e3d7308
import requests
import base64
from langchain_core.output_parsers import JsonOutputParser
import base64
from langchain.chains import TransformChain
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain import globals
from langchain_core.runnables import chain
from langchain_core.pydantic_v1 import BaseModel, Field
import os
from openai import OpenAI
from dotenv import load_dotenv
import json
load_dotenv()
# Imgur and SERP API credentials
imgur_client_id = os.getenv('imgur_client_id')
serp_api_key = os.getenv('serp_api_key')
search_endpoint = 'https://serpapi.com/search'
# Set up your OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
# Replace with your OpenAI API key
gpt_api_key = os.getenv('gpt_api_key')
def upload_image_to_imgur(image_path):
headers = {'Authorization': f'Client-ID {imgur_client_id}'}
data = {'image': open(image_path, 'rb').read()}
response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
response_data = response.json()
if response.status_code == 200 and response_data['success']:
return response_data['data']['link']
else:
raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")
def reverse_image_search(image_url):
params = {
'engine': 'google_reverse_image',
'image_url': image_url,
# "image_content": image_url,
'api_key': serp_api_key
}
response = requests.get(search_endpoint, params=params)
return response.json()
def extract_titles_and_descriptions(search_results, top_n=3):
titles_and_descriptions = []
for result in search_results.get('image_results', [])[:top_n]:
temp_dict = {}
title = result.get('title', '')
description = result.get('snippet', '')
temp_dict['title'] = title
temp_dict['description'] = description
titles_and_descriptions.append(temp_dict)
return titles_and_descriptions
def load_image(inputs: dict) -> dict:
"""Load image from file and encode it as base64."""
image_path = inputs["image_path"]
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
image_base64 = encode_image(image_path)
return {"image": image_base64}
class ImageInformation(BaseModel):
"""Information about an image."""
Title: str = Field(description="Suitable title for the given product in image")
image_description: str = Field(description="a short description of the image")
# main_objects: list[str] = Field(description="list of the main objects on the picture")
# Set verbose
# globals.set_debug(True)
@chain
def image_model(inputs: dict) -> str | list[str] | dict:
"""Invoke model with image and prompt."""
model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
msg = model.invoke(
[HumanMessage(
content=[
{"type": "text", "text": inputs["prompt"]},
{"type": "text", "text": parser.get_format_instructions()},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
])]
)
return msg.content
load_image_chain = TransformChain(
input_variables=["image_path"],
output_variables=["image"],
transform=load_image
)
parser = JsonOutputParser(pydantic_object=ImageInformation)
def get_image_informations(image_path: str) -> dict:
vision_prompt = """
Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information:
- The extracted text written on the product.
- Title of the product in image based on the extracted text
"""
vision_chain = load_image_chain | image_model | parser
return vision_chain.invoke({'image_path': f'{image_path}',
'prompt': vision_prompt})
def parse_json_response(response):
# Remove the enclosing markers if present
if response.startswith("```json") and response.endswith("```"):
response = response[7:-3].strip()
# Load the response as a JSON object
data = json.loads(response)
# Find the key that contains the list of items
listings_key = None
for key, value in data.items():
if isinstance(value, list) and all(isinstance(item, dict) for item in value):
listings_key = key
break
if not listings_key:
raise ValueError("No valid listings key found in the response")
listings = data[listings_key]
# Create a list to store the parsed dictionaries
parsed_data = []
# Iterate through each item in the listings
for item in listings:
# Extract the title and features
title = item.get("Title", "")
features = item.get("Features", [])
# Create a dictionary for each item
item_dict = {
"Title": title,
"Features": features
}
# Append the dictionary to the list
parsed_data.append(item_dict)
return parsed_data
def main(image_path):
# try:
# Upload image to Imgur and get the URL
image_url = upload_image_to_imgur(image_path)
print(f"Image uploaded to Imgur: {image_url}")
# Perform reverse image search
search_results = reverse_image_search(image_url)
if 'error' in search_results:
print("Error in Serp API:", search_results['error'])
# Extract titles and descriptions
serp_results = extract_titles_and_descriptions(search_results)
print("Serp Result: ",serp_results, "\n\n\n\n")
gpt_vision_result = get_image_informations(image_path)
print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n")
# Prompt to generate the JSON for the product listing
prompt = f'''
You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image.
Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result.
#### SERP Results:
{serp_results}
#### GPT Vision Result:
{gpt_vision_result}
Generate a JSON for product listing (at Least THREE) based on the above results.
#### Please provide in the form of a json. Following is the format of the json::
{{
"Listings": [
{{
"Title": "Example Title",
"Features": [
"Feature 1",
"Feature 2",
"Feature 3",
.,
.,
.,
.,
.,
"feature N"
]
}},
{{
"Title": "Example Title",
"Features": [
"Feature 1",
"Feature 2",
"Feature 3",
.,
.,
.,
.,
.,
"feature N"
]
}}
]
}}
'''
gpt_model = OpenAI(api_key=gpt_api_key)
# Call the ChatGPT 3.5 model using the chat completion endpoint
response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
])
# Extract the text from the response
generated_text = response.choices[0].message.content
print("Generated Text: ",generated_text)
parsed_data = parse_json_response(generated_text)
# Print the ChatGPT response
return parsed_data
if __name__ == "__main__":
image_path = 'sampleImages/edited3.jpg' # Replace with the path to your local image
main(image_path)