Ahmad Shahzad commited on
Commit
e3d7308
·
1 Parent(s): 1051746
.DS_Store ADDED
Binary file (8.2 kB). View file
 
SampleImages/edited.jpg ADDED
SampleImages/edited2.jpg ADDED
SampleImages/edited3.jpg ADDED
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ from PIL import Image
4
+ from pipeline import main
5
+
6
+ # Function to format the output
7
+ def format_output(data):
8
+ formatted_data = []
9
+ for item in data:
10
+ block = f"**{item['Title']}**\n\n" + "\n".join([f"- {feature}" for feature in item['Features']])
11
+ formatted_data.append(block)
12
+ return formatted_data
13
+
14
+ # Function to handle image input, save it temporarily, and display formatted output
15
+ def process_image(image):
16
+ # Save the uploaded image to a temporary file
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
18
+ image.save(temp_file.name)
19
+ temp_file_path = temp_file.name
20
+
21
+ # Process the image using your main function
22
+ data = main(temp_file_path)
23
+ formatted_data = format_output(data)
24
+ return tuple(formatted_data) # Returning as a tuple for Gradio's multiple outputs
25
+
26
+ # Create Gradio blocks for each dictionary
27
+ with gr.Blocks() as demo:
28
+ with gr.Row():
29
+ input_image = gr.Image(type="pil", label="Input Image", image_mode="RGB", height=512, width=512)
30
+
31
+ with gr.Row():
32
+ output1 = gr.Markdown(label="Block 1")
33
+ output2 = gr.Markdown(label="Block 2")
34
+ output3 = gr.Markdown(label="Block 3")
35
+
36
+ # Button to trigger the display function
37
+ button = gr.Button("Process Image")
38
+ button.click(process_image, inputs=input_image, outputs=[output1, output2, output3])
39
+
40
+ demo.launch()
gpt_vision.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain_core.output_parsers import JsonOutputParser
3
+ import base64
4
+ from langchain.chains import TransformChain
5
+ from langchain_core.messages import HumanMessage
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain import globals
8
+ from langchain_core.runnables import chain
9
+ from langchain_core.pydantic_v1 import BaseModel, Field
10
+ import os
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # Set up your OpenAI API key
16
+ os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
17
+
18
+ def load_image(inputs: dict) -> dict:
19
+ """Load image from file and encode it as base64."""
20
+ image_path = inputs["image_path"]
21
+
22
+ def encode_image(image_path):
23
+ with open(image_path, "rb") as image_file:
24
+ return base64.b64encode(image_file.read()).decode('utf-8')
25
+ image_base64 = encode_image(image_path)
26
+ return {"image": image_base64}
27
+
28
+
29
+ load_image_chain = TransformChain(
30
+ input_variables=["image_path"],
31
+ output_variables=["image"],
32
+ transform=load_image
33
+ )
34
+
35
+ class ImageInformation(BaseModel):
36
+ """Information about an image."""
37
+
38
+ Title: str = Field(description="Suitable title for the given product in image")
39
+ image_description: str = Field(description="a short description of the image")
40
+ # main_objects: list[str] = Field(description="list of the main objects on the picture")
41
+
42
+
43
+ # Set verbose
44
+ # globals.set_debug(True)
45
+
46
+ @chain
47
+ def image_model(inputs: dict) -> str | list[str] | dict:
48
+ """Invoke model with image and prompt."""
49
+ model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
50
+ msg = model.invoke(
51
+ [HumanMessage(
52
+ content=[
53
+ {"type": "text", "text": inputs["prompt"]},
54
+ {"type": "text", "text": parser.get_format_instructions()},
55
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
56
+ ])]
57
+ )
58
+ return msg.content
59
+
60
+
61
+ parser = JsonOutputParser(pydantic_object=ImageInformation)
62
+ def get_image_informations(image_path: str) -> dict:
63
+ vision_prompt = """
64
+ Given the image, provide the following information:
65
+ - Title of the product in image
66
+ - A description of the product in image based on the text written in image
67
+ """
68
+ vision_chain = load_image_chain | image_model | parser
69
+ return vision_chain.invoke({'image_path': f'{image_path}',
70
+ 'prompt': vision_prompt})
71
+
72
+
73
+
74
+ gpt_vision_result = get_image_informations("sampleImages/edited3.jpg")
75
+ print(gpt_vision_result)
76
+
77
+
78
+
79
+
pipeline.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import base64
3
+ from langchain_core.output_parsers import JsonOutputParser
4
+ import base64
5
+ from langchain.chains import TransformChain
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain import globals
9
+ from langchain_core.runnables import chain
10
+ from langchain_core.pydantic_v1 import BaseModel, Field
11
+ import os
12
+ from openai import OpenAI
13
+ from dotenv import load_dotenv
14
+ import json
15
+
16
+ load_dotenv()
17
+ # Imgur and SERP API credentials
18
+ imgur_client_id = os.getenv('imgur_client_id')
19
+ serp_api_key = os.getenv('serp_api_key')
20
+ search_endpoint = 'https://serpapi.com/search'
21
+
22
+ # Set up your OpenAI API key
23
+ os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
24
+ # Replace with your OpenAI API key
25
+ gpt_api_key = os.getenv('gpt_api_key')
26
+
27
+
28
+ def upload_image_to_imgur(image_path):
29
+ headers = {'Authorization': f'Client-ID {imgur_client_id}'}
30
+ data = {'image': open(image_path, 'rb').read()}
31
+ response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
32
+ response_data = response.json()
33
+ if response.status_code == 200 and response_data['success']:
34
+ return response_data['data']['link']
35
+ else:
36
+ raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")
37
+
38
+ def reverse_image_search(image_url):
39
+ params = {
40
+ 'engine': 'google_reverse_image',
41
+ 'image_url': image_url,
42
+ # "image_content": image_url,
43
+ 'api_key': serp_api_key
44
+ }
45
+ response = requests.get(search_endpoint, params=params)
46
+ return response.json()
47
+
48
+ def extract_titles_and_descriptions(search_results, top_n=3):
49
+ titles_and_descriptions = []
50
+ for result in search_results.get('image_results', [])[:top_n]:
51
+ temp_dict = {}
52
+ title = result.get('title', '')
53
+ description = result.get('snippet', '')
54
+ temp_dict['title'] = title
55
+ temp_dict['description'] = description
56
+ titles_and_descriptions.append(temp_dict)
57
+ return titles_and_descriptions
58
+
59
+ def load_image(inputs: dict) -> dict:
60
+ """Load image from file and encode it as base64."""
61
+ image_path = inputs["image_path"]
62
+
63
+ def encode_image(image_path):
64
+ with open(image_path, "rb") as image_file:
65
+ return base64.b64encode(image_file.read()).decode('utf-8')
66
+ image_base64 = encode_image(image_path)
67
+ return {"image": image_base64}
68
+
69
+ class ImageInformation(BaseModel):
70
+ """Information about an image."""
71
+
72
+ Title: str = Field(description="Suitable title for the given product in image")
73
+ image_description: str = Field(description="a short description of the image")
74
+ # main_objects: list[str] = Field(description="list of the main objects on the picture")
75
+
76
+
77
+ # Set verbose
78
+ # globals.set_debug(True)
79
+
80
+ @chain
81
+ def image_model(inputs: dict) -> str | list[str] | dict:
82
+ """Invoke model with image and prompt."""
83
+ model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
84
+ msg = model.invoke(
85
+ [HumanMessage(
86
+ content=[
87
+ {"type": "text", "text": inputs["prompt"]},
88
+ {"type": "text", "text": parser.get_format_instructions()},
89
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
90
+ ])]
91
+ )
92
+ return msg.content
93
+
94
+
95
+ load_image_chain = TransformChain(
96
+ input_variables=["image_path"],
97
+ output_variables=["image"],
98
+ transform=load_image
99
+ )
100
+
101
+ parser = JsonOutputParser(pydantic_object=ImageInformation)
102
+ def get_image_informations(image_path: str) -> dict:
103
+ vision_prompt = """
104
+ Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information:
105
+ - The extracted text written on the product.
106
+ - Title of the product in image based on the extracted text
107
+ """
108
+ vision_chain = load_image_chain | image_model | parser
109
+ return vision_chain.invoke({'image_path': f'{image_path}',
110
+ 'prompt': vision_prompt})
111
+
112
+ def parse_json_response(response):
113
+ # Remove the enclosing markers if present
114
+ if response.startswith("```json") and response.endswith("```"):
115
+ response = response[7:-3].strip()
116
+
117
+ # Load the response as a JSON object
118
+ data = json.loads(response)
119
+
120
+ # Find the key that contains the list of items
121
+ listings_key = None
122
+ for key, value in data.items():
123
+ if isinstance(value, list) and all(isinstance(item, dict) for item in value):
124
+ listings_key = key
125
+ break
126
+
127
+ if not listings_key:
128
+ raise ValueError("No valid listings key found in the response")
129
+
130
+ listings = data[listings_key]
131
+
132
+ # Create a list to store the parsed dictionaries
133
+ parsed_data = []
134
+
135
+ # Iterate through each item in the listings
136
+ for item in listings:
137
+ # Extract the title and features
138
+ title = item.get("Title", "")
139
+ features = item.get("Features", [])
140
+
141
+ # Create a dictionary for each item
142
+ item_dict = {
143
+ "Title": title,
144
+ "Features": features
145
+ }
146
+
147
+ # Append the dictionary to the list
148
+ parsed_data.append(item_dict)
149
+
150
+ return parsed_data
151
+
152
+ def main(image_path):
153
+ # try:
154
+ # Upload image to Imgur and get the URL
155
+ image_url = upload_image_to_imgur(image_path)
156
+ print(f"Image uploaded to Imgur: {image_url}")
157
+
158
+ # Perform reverse image search
159
+ search_results = reverse_image_search(image_url)
160
+ if 'error' in search_results:
161
+ print("Error in Serp API:", search_results['error'])
162
+
163
+
164
+ # Extract titles and descriptions
165
+ serp_results = extract_titles_and_descriptions(search_results)
166
+ print("Serp Result: ",serp_results, "\n\n\n\n")
167
+
168
+ gpt_vision_result = get_image_informations(image_path)
169
+ print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n")
170
+
171
+
172
+ # Prompt to generate the JSON for the product listing
173
+ prompt = f'''
174
+ You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image.
175
+ Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result.
176
+ #### SERP Results:
177
+ {serp_results}
178
+
179
+ #### GPT Vision Result:
180
+ {gpt_vision_result}
181
+
182
+
183
+ Generate a JSON for product listing (at Least THREE) based on the above results.
184
+
185
+ #### Please provide in the form of a json. Following is the format of the json::
186
+
187
+
188
+ {{
189
+ "Listings": [
190
+ {{
191
+ "Title": "Example Title",
192
+ "Features": [
193
+ "Feature 1",
194
+ "Feature 2",
195
+ "Feature 3",
196
+ .,
197
+ .,
198
+ .,
199
+ .,
200
+ .,
201
+ "feature N"
202
+ ]
203
+ }},
204
+
205
+ {{
206
+ "Title": "Example Title",
207
+ "Features": [
208
+ "Feature 1",
209
+ "Feature 2",
210
+ "Feature 3",
211
+ .,
212
+ .,
213
+ .,
214
+ .,
215
+ .,
216
+ "feature N"
217
+ ]
218
+ }}
219
+
220
+
221
+ ]
222
+ }}
223
+
224
+ '''
225
+
226
+ gpt_model = OpenAI(api_key=gpt_api_key)
227
+ # Call the ChatGPT 3.5 model using the chat completion endpoint
228
+ response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0,
229
+ messages=[
230
+ {"role": "system", "content": "You are a helpful assistant."},
231
+ {"role": "user", "content": prompt}
232
+ ])
233
+ # Extract the text from the response
234
+ generated_text = response.choices[0].message.content
235
+
236
+ print("Generated Text: ",generated_text)
237
+ parsed_data = parse_json_response(generated_text)
238
+ # Print the ChatGPT response
239
+
240
+ return parsed_data
241
+
242
+ if __name__ == "__main__":
243
+ image_path = 'sampleImages/edited3.jpg' # Replace with the path to your local image
244
+ main(image_path)
requirements.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.3.1
4
+ altair==5.3.0
5
+ annotated-types==0.7.0
6
+ anyio==4.4.0
7
+ async-timeout==4.0.3
8
+ attrs==23.2.0
9
+ certifi==2024.6.2
10
+ charset-normalizer==3.3.2
11
+ click==8.1.7
12
+ contourpy==1.2.1
13
+ cycler==0.12.1
14
+ dataclasses-json==0.6.7
15
+ distro==1.9.0
16
+ dnspython==2.6.1
17
+ email_validator==2.2.0
18
+ exceptiongroup==1.2.1
19
+ fastapi==0.111.0
20
+ fastapi-cli==0.0.4
21
+ ffmpy==0.3.2
22
+ filelock==3.15.4
23
+ fonttools==4.53.0
24
+ frozenlist==1.4.1
25
+ fsspec==2024.6.1
26
+ gradio==4.31.5
27
+ gradio_client==0.16.4
28
+ h11==0.14.0
29
+ httpcore==1.0.5
30
+ httptools==0.6.1
31
+ httpx==0.27.0
32
+ huggingface-hub==0.23.4
33
+ idna==3.7
34
+ importlib_resources==6.4.0
35
+ Jinja2==3.1.4
36
+ jsonpatch==1.33
37
+ jsonpointer==3.0.0
38
+ jsonschema==4.22.0
39
+ jsonschema-specifications==2023.12.1
40
+ kiwisolver==1.4.5
41
+ langchain==0.2.0
42
+ langchain-core==0.2.11
43
+ langchain-openai==0.1.7
44
+ langchain-text-splitters==0.2.2
45
+ langsmith==0.1.83
46
+ markdown-it-py==3.0.0
47
+ MarkupSafe==2.1.5
48
+ marshmallow==3.21.3
49
+ matplotlib==3.9.0
50
+ mdurl==0.1.2
51
+ multidict==6.0.5
52
+ mypy-extensions==1.0.0
53
+ numpy==1.26.4
54
+ openai==1.35.9
55
+ orjson==3.10.6
56
+ packaging==24.1
57
+ pandas==2.2.2
58
+ pillow==10.3.0
59
+ pydantic==2.8.0
60
+ pydantic_core==2.20.0
61
+ pydub==0.25.1
62
+ Pygments==2.18.0
63
+ pyparsing==3.1.2
64
+ python-dateutil==2.9.0.post0
65
+ python-dotenv==1.0.1
66
+ python-multipart==0.0.9
67
+ pytz==2024.1
68
+ PyYAML==6.0.1
69
+ referencing==0.35.1
70
+ regex==2024.5.15
71
+ requests==2.32.3
72
+ rich==13.7.1
73
+ rpds-py==0.18.1
74
+ ruff==0.5.0
75
+ semantic-version==2.10.0
76
+ shellingham==1.5.4
77
+ six==1.16.0
78
+ sniffio==1.3.1
79
+ SQLAlchemy==2.0.31
80
+ starlette==0.37.2
81
+ tenacity==8.4.2
82
+ tiktoken==0.7.0
83
+ tomlkit==0.12.0
84
+ toolz==0.12.1
85
+ tqdm==4.66.4
86
+ typer==0.12.3
87
+ typing-inspect==0.9.0
88
+ typing_extensions==4.12.2
89
+ tzdata==2024.1
90
+ ujson==5.10.0
91
+ urllib3==2.2.2
92
+ uvicorn==0.30.1
93
+ uvloop==0.19.0
94
+ watchfiles==0.22.0
95
+ websockets==11.0.3
96
+ yarl==1.9.4
serp_imgur.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ from dotenv import load_dotenv
4
+
5
+
6
+ load_dotenv()
7
+ # Imgur and SERP API credentials
8
+ imgur_client_id = os.getenv('imgur_client_id')
9
+ serp_api_key = os.getenv('serp_api_key')
10
+ search_endpoint = 'https://serpapi.com/search'
11
+
12
+
13
+ def upload_image_to_imgur(image_path):
14
+ headers = {'Authorization': f'Client-ID {imgur_client_id}'}
15
+ data = {'image': open(image_path, 'rb').read()}
16
+ response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
17
+ response_data = response.json()
18
+ if response.status_code == 200 and response_data['success']:
19
+ return response_data['data']['link']
20
+ else:
21
+ raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")
22
+
23
+ def reverse_image_search(image_url):
24
+ params = {
25
+ 'engine': 'google_reverse_image',
26
+ 'image_url': image_url,
27
+ # "image_content": image_url,
28
+ 'api_key': serp_api_key
29
+ }
30
+ response = requests.get(search_endpoint, params=params)
31
+ return response.json()
32
+
33
+ def extract_titles_and_descriptions(search_results, top_n=3):
34
+ titles_and_descriptions = []
35
+ for result in search_results.get('image_results', [])[:top_n]:
36
+ temp_dict = {}
37
+ title = result.get('title', '')
38
+ description = result.get('snippet', '')
39
+ temp_dict['title'] = title
40
+ temp_dict['description'] = description
41
+ titles_and_descriptions.append(temp_dict)
42
+ return titles_and_descriptions
43
+
44
+ def main(image_path):
45
+ # try:
46
+ # Upload image to Imgur and get the URL
47
+ image_url = upload_image_to_imgur(image_path)
48
+ print(f"Image uploaded to Imgur: {image_url}")
49
+
50
+ # Perform reverse image search
51
+ search_results = reverse_image_search(image_url)
52
+ if 'error' in search_results:
53
+ print("Error:", search_results['error'])
54
+ return
55
+
56
+ # Extract titles and descriptions
57
+ titles_and_descriptions = extract_titles_and_descriptions(search_results)
58
+ print(titles_and_descriptions)
59
+ # Print results
60
+ # for idx, (title, description) in enumerate(titles_and_descriptions):
61
+ # print(f"Result {idx+1}:")
62
+ # print("Title:", title)
63
+ # print("Description:", description)
64
+ # print("-" * 50)
65
+ # except Exception as e:
66
+ # print(f"An error occurred: {e}")
67
+
68
+ if __name__ == "__main__":
69
+ image_path = 'sampleImages/edited3.jpg' # Replace with the path to your local image
70
+ main(image_path)