import requests import base64 from langchain_core.output_parsers import JsonOutputParser import base64 from langchain.chains import TransformChain from langchain_core.messages import HumanMessage from langchain_openai import ChatOpenAI from langchain import globals from langchain_core.runnables import chain from langchain_core.pydantic_v1 import BaseModel, Field import os from openai import OpenAI from dotenv import load_dotenv import json load_dotenv() # Imgur and SERP API credentials imgur_client_id = os.getenv('imgur_client_id') serp_api_key = os.getenv('serp_api_key') search_endpoint = 'https://serpapi.com/search' # Set up your OpenAI API key os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key') # Replace with your OpenAI API key gpt_api_key = os.getenv('gpt_api_key') def upload_image_to_imgur(image_path): headers = {'Authorization': f'Client-ID {imgur_client_id}'} data = {'image': open(image_path, 'rb').read()} response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data) response_data = response.json() if response.status_code == 200 and response_data['success']: return response_data['data']['link'] else: raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}") def reverse_image_search(image_url): params = { 'engine': 'google_reverse_image', 'image_url': image_url, # "image_content": image_url, 'api_key': serp_api_key } response = requests.get(search_endpoint, params=params) return response.json() def extract_titles_and_descriptions(search_results, top_n=3): titles_and_descriptions = [] for result in search_results.get('image_results', [])[:top_n]: temp_dict = {} title = result.get('title', '') description = result.get('snippet', '') temp_dict['title'] = title temp_dict['description'] = description titles_and_descriptions.append(temp_dict) return titles_and_descriptions def load_image(inputs: dict) -> dict: """Load image from file and encode it as base64.""" image_path = inputs["image_path"] def encode_image(image_path): with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') image_base64 = encode_image(image_path) return {"image": image_base64} class ImageInformation(BaseModel): """Information about an image.""" Title: str = Field(description="Suitable title for the given product in image") image_description: str = Field(description="a short description of the image") # main_objects: list[str] = Field(description="list of the main objects on the picture") # Set verbose # globals.set_debug(True) @chain def image_model(inputs: dict) -> str | list[str] | dict: """Invoke model with image and prompt.""" model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024) msg = model.invoke( [HumanMessage( content=[ {"type": "text", "text": inputs["prompt"]}, {"type": "text", "text": parser.get_format_instructions()}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}}, ])] ) return msg.content load_image_chain = TransformChain( input_variables=["image_path"], output_variables=["image"], transform=load_image ) parser = JsonOutputParser(pydantic_object=ImageInformation) def get_image_informations(image_path: str) -> dict: vision_prompt = """ Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information: - The extracted text written on the product. - Title of the product in image based on the extracted text """ vision_chain = load_image_chain | image_model | parser return vision_chain.invoke({'image_path': f'{image_path}', 'prompt': vision_prompt}) def parse_json_response(response): # Remove the enclosing markers if present if response.startswith("```json") and response.endswith("```"): response = response[7:-3].strip() # Load the response as a JSON object data = json.loads(response) # Find the key that contains the list of items listings_key = None for key, value in data.items(): if isinstance(value, list) and all(isinstance(item, dict) for item in value): listings_key = key break if not listings_key: raise ValueError("No valid listings key found in the response") listings = data[listings_key] # Create a list to store the parsed dictionaries parsed_data = [] # Iterate through each item in the listings for item in listings: # Extract the title and features title = item.get("Title", "") features = item.get("Features", []) # Create a dictionary for each item item_dict = { "Title": title, "Features": features } # Append the dictionary to the list parsed_data.append(item_dict) return parsed_data def main(image_path): # try: # Upload image to Imgur and get the URL image_url = upload_image_to_imgur(image_path) print(f"Image uploaded to Imgur: {image_url}") # Perform reverse image search search_results = reverse_image_search(image_url) if 'error' in search_results: print("Error in Serp API:", search_results['error']) # Extract titles and descriptions serp_results = extract_titles_and_descriptions(search_results) print("Serp Result: ",serp_results, "\n\n\n\n") gpt_vision_result = get_image_informations(image_path) print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n") # Prompt to generate the JSON for the product listing prompt = f''' You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image. Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result. #### SERP Results: {serp_results} #### GPT Vision Result: {gpt_vision_result} Generate a JSON for product listing (at Least THREE) based on the above results. #### Please provide in the form of a json. Following is the format of the json:: {{ "Listings": [ {{ "Title": "Example Title", "Features": [ "Feature 1", "Feature 2", "Feature 3", ., ., ., ., ., "feature N" ] }}, {{ "Title": "Example Title", "Features": [ "Feature 1", "Feature 2", "Feature 3", ., ., ., ., ., "feature N" ] }} ] }} ''' gpt_model = OpenAI(api_key=gpt_api_key) # Call the ChatGPT 3.5 model using the chat completion endpoint response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ]) # Extract the text from the response generated_text = response.choices[0].message.content print("Generated Text: ",generated_text) parsed_data = parse_json_response(generated_text) # Print the ChatGPT response return parsed_data if __name__ == "__main__": image_path = 'sampleImages/edited3.jpg' # Replace with the path to your local image main(image_path)