File size: 8,487 Bytes
e3d7308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import requests
import base64
from langchain_core.output_parsers import JsonOutputParser
import base64
from langchain.chains import TransformChain
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langchain import globals
from langchain_core.runnables import chain
from langchain_core.pydantic_v1 import BaseModel, Field
import os
from openai import OpenAI
from dotenv import load_dotenv
import json

load_dotenv()
# Imgur and SERP API credentials
imgur_client_id = os.getenv('imgur_client_id')
serp_api_key = os.getenv('serp_api_key')
search_endpoint = 'https://serpapi.com/search'

# Set up your OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
# Replace with your OpenAI API key
gpt_api_key = os.getenv('gpt_api_key')


def upload_image_to_imgur(image_path):
    headers = {'Authorization': f'Client-ID {imgur_client_id}'}
    data = {'image': open(image_path, 'rb').read()}
    response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
    response_data = response.json()
    if response.status_code == 200 and response_data['success']:
        return response_data['data']['link']
    else:
        raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")

def reverse_image_search(image_url):
    params = {
        'engine': 'google_reverse_image',
        'image_url': image_url,
        # "image_content": image_url,
        'api_key': serp_api_key
    }
    response = requests.get(search_endpoint, params=params)
    return response.json()

def extract_titles_and_descriptions(search_results, top_n=3):
    titles_and_descriptions = []
    for result in search_results.get('image_results', [])[:top_n]:
        temp_dict = {}
        title = result.get('title', '')
        description = result.get('snippet', '')
        temp_dict['title'] = title
        temp_dict['description'] = description
        titles_and_descriptions.append(temp_dict)
    return titles_and_descriptions

def load_image(inputs: dict) -> dict:
    """Load image from file and encode it as base64."""
    image_path = inputs["image_path"]
  
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    image_base64 = encode_image(image_path)
    return {"image": image_base64}

class ImageInformation(BaseModel):
    """Information about an image."""

    Title: str = Field(description="Suitable title for the given product in image")
    image_description: str = Field(description="a short description of the image")
    #  main_objects: list[str] = Field(description="list of the main objects on the picture")


# Set verbose
# globals.set_debug(True)

@chain
def image_model(inputs: dict) -> str | list[str] | dict:
    """Invoke model with image and prompt."""
    model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
    msg = model.invoke(
                [HumanMessage(
                content=[
                {"type": "text", "text": inputs["prompt"]},
                {"type": "text", "text": parser.get_format_instructions()},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
                ])]
                )
    return msg.content


load_image_chain = TransformChain(
    input_variables=["image_path"],
    output_variables=["image"],
    transform=load_image
)

parser = JsonOutputParser(pydantic_object=ImageInformation)
def get_image_informations(image_path: str) -> dict:
    vision_prompt = """
    Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information:
    - The extracted text written on the product.
    - Title of the product in image based on the extracted text
    """
    vision_chain = load_image_chain | image_model | parser
    return vision_chain.invoke({'image_path': f'{image_path}', 
                                'prompt': vision_prompt})

def parse_json_response(response):
    # Remove the enclosing markers if present
    if response.startswith("```json") and response.endswith("```"):
        response = response[7:-3].strip()
    
    # Load the response as a JSON object
    data = json.loads(response)
    
    # Find the key that contains the list of items
    listings_key = None
    for key, value in data.items():
        if isinstance(value, list) and all(isinstance(item, dict) for item in value):
            listings_key = key
            break
    
    if not listings_key:
        raise ValueError("No valid listings key found in the response")
    
    listings = data[listings_key]
    
    # Create a list to store the parsed dictionaries
    parsed_data = []
    
    # Iterate through each item in the listings
    for item in listings:
        # Extract the title and features
        title = item.get("Title", "")
        features = item.get("Features", [])
        
        # Create a dictionary for each item
        item_dict = {
            "Title": title,
            "Features": features
        }
        
        # Append the dictionary to the list
        parsed_data.append(item_dict)
    
    return parsed_data

def main(image_path):
    # try:
        # Upload image to Imgur and get the URL
        image_url = upload_image_to_imgur(image_path)
        print(f"Image uploaded to Imgur: {image_url}")

        # Perform reverse image search
        search_results = reverse_image_search(image_url)
        if 'error' in search_results:
            print("Error in Serp API:", search_results['error'])
        

        # Extract titles and descriptions
        serp_results = extract_titles_and_descriptions(search_results)
        print("Serp Result: ",serp_results, "\n\n\n\n")

        gpt_vision_result = get_image_informations(image_path)
        print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n")


        # Prompt to generate the JSON for the product listing
        prompt = f'''
        You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image.
        Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result. 
        #### SERP Results:
        {serp_results}

        #### GPT Vision Result:
        {gpt_vision_result}


        Generate a JSON for product listing (at Least THREE) based on the above results.

        #### Please provide in the form of a json. Following is the format of the json::

        
        {{
            "Listings": [
                {{
                    "Title": "Example Title",
                    "Features": [
                        "Feature 1",
                        "Feature 2",
                        "Feature 3",
                        .,
                        .,
                        .,
                        .,
                        .,
                        "feature N"
                    ]
                }},

                {{
                    "Title": "Example Title",
                    "Features": [
                        "Feature 1",
                        "Feature 2",
                        "Feature 3",
                        .,
                        .,
                        .,
                        .,
                        .,
                        "feature N"
                    ]
                }}


            ]
        }}

        '''

        gpt_model = OpenAI(api_key=gpt_api_key)
        # Call the ChatGPT 3.5 model using the chat completion endpoint
        response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ])
        # Extract the text from the response
        generated_text = response.choices[0].message.content

        print("Generated Text: ",generated_text)
        parsed_data = parse_json_response(generated_text)
        # Print the ChatGPT response

        return parsed_data

if __name__ == "__main__":
    image_path = 'sampleImages/edited3.jpg'  # Replace with the path to your local image
    main(image_path)