Spaces:

Ahmadshahzad2
/

listing-gen

Sleeping

App Files Files Community

Ahmad Shahzad commited on Jul 3, 2024

Commit

e3d7308

1 Parent(s): 1051746

app added

Browse files

Files changed (9) hide show

.DS_Store +0 -0
SampleImages/edited.jpg +0 -0
SampleImages/edited2.jpg +0 -0
SampleImages/edited3.jpg +0 -0
app.py +40 -0
gpt_vision.py +79 -0
pipeline.py +244 -0
requirements.txt +96 -0
serp_imgur.py +70 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

SampleImages/edited.jpg ADDED Viewed

SampleImages/edited2.jpg ADDED Viewed

SampleImages/edited3.jpg ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import gradio as gr
+import tempfile
+from PIL import Image
+from pipeline import main
+# Function to format the output
+def format_output(data):
+    formatted_data = []
+    for item in data:
+        block = f"**{item['Title']}**\n\n" + "\n".join([f"- {feature}" for feature in item['Features']])
+        formatted_data.append(block)
+    return formatted_data
+# Function to handle image input, save it temporarily, and display formatted output
+def process_image(image):
+    # Save the uploaded image to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+        image.save(temp_file.name)
+        temp_file_path = temp_file.name
+    # Process the image using your main function
+    data = main(temp_file_path)
+    formatted_data = format_output(data)
+    return tuple(formatted_data)  # Returning as a tuple for Gradio's multiple outputs
+# Create Gradio blocks for each dictionary
+with gr.Blocks() as demo:
+    with gr.Row():
+        input_image = gr.Image(type="pil", label="Input Image",  image_mode="RGB", height=512, width=512)
+    with gr.Row():
+        output1 = gr.Markdown(label="Block 1")
+        output2 = gr.Markdown(label="Block 2")
+        output3 = gr.Markdown(label="Block 3")
+    # Button to trigger the display function
+    button = gr.Button("Process Image")
+    button.click(process_image, inputs=input_image, outputs=[output1, output2, output3])
+demo.launch()

gpt_vision.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from langchain_core.output_parsers import JsonOutputParser
+import base64
+from langchain.chains import TransformChain
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from langchain import globals
+from langchain_core.runnables import chain
+from langchain_core.pydantic_v1 import BaseModel, Field
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Set up your OpenAI API key
+os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
+def load_image(inputs: dict) -> dict:
+    """Load image from file and encode it as base64."""
+    image_path = inputs["image_path"]
+    def encode_image(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    image_base64 = encode_image(image_path)
+    return {"image": image_base64}
+load_image_chain = TransformChain(
+    input_variables=["image_path"],
+    output_variables=["image"],
+    transform=load_image
+)
+class ImageInformation(BaseModel):
+    """Information about an image."""
+    Title: str = Field(description="Suitable title for the given product in image")
+    image_description: str = Field(description="a short description of the image")
+    #  main_objects: list[str] = Field(description="list of the main objects on the picture")
+# Set verbose
+# globals.set_debug(True)
+@chain
+def image_model(inputs: dict) -> str | list[str] | dict:
+    """Invoke model with image and prompt."""
+    model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
+    msg = model.invoke(
+                [HumanMessage(
+                content=[
+                {"type": "text", "text": inputs["prompt"]},
+                {"type": "text", "text": parser.get_format_instructions()},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
+                ])]
+                )
+    return msg.content
+parser = JsonOutputParser(pydantic_object=ImageInformation)
+def get_image_informations(image_path: str) -> dict:
+    vision_prompt = """
+    Given the image, provide the following information:
+    - Title of the product in image
+    - A description of the product in image based on the text written in image
+    """
+    vision_chain = load_image_chain | image_model | parser
+    return vision_chain.invoke({'image_path': f'{image_path}',
+                                'prompt': vision_prompt})
+gpt_vision_result = get_image_informations("sampleImages/edited3.jpg")
+print(gpt_vision_result)

pipeline.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import requests
+import base64
+from langchain_core.output_parsers import JsonOutputParser
+import base64
+from langchain.chains import TransformChain
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from langchain import globals
+from langchain_core.runnables import chain
+from langchain_core.pydantic_v1 import BaseModel, Field
+import os
+from openai import OpenAI
+from dotenv import load_dotenv
+import json
+load_dotenv()
+# Imgur and SERP API credentials
+imgur_client_id = os.getenv('imgur_client_id')
+serp_api_key = os.getenv('serp_api_key')
+search_endpoint = 'https://serpapi.com/search'
+# Set up your OpenAI API key
+os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key')
+# Replace with your OpenAI API key
+gpt_api_key = os.getenv('gpt_api_key')
+def upload_image_to_imgur(image_path):
+    headers = {'Authorization': f'Client-ID {imgur_client_id}'}
+    data = {'image': open(image_path, 'rb').read()}
+    response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
+    response_data = response.json()
+    if response.status_code == 200 and response_data['success']:
+        return response_data['data']['link']
+    else:
+        raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")
+def reverse_image_search(image_url):
+    params = {
+        'engine': 'google_reverse_image',
+        'image_url': image_url,
+        # "image_content": image_url,
+        'api_key': serp_api_key
+    }
+    response = requests.get(search_endpoint, params=params)
+    return response.json()
+def extract_titles_and_descriptions(search_results, top_n=3):
+    titles_and_descriptions = []
+    for result in search_results.get('image_results', [])[:top_n]:
+        temp_dict = {}
+        title = result.get('title', '')
+        description = result.get('snippet', '')
+        temp_dict['title'] = title
+        temp_dict['description'] = description
+        titles_and_descriptions.append(temp_dict)
+    return titles_and_descriptions
+def load_image(inputs: dict) -> dict:
+    """Load image from file and encode it as base64."""
+    image_path = inputs["image_path"]
+    def encode_image(image_path):
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    image_base64 = encode_image(image_path)
+    return {"image": image_base64}
+class ImageInformation(BaseModel):
+    """Information about an image."""
+    Title: str = Field(description="Suitable title for the given product in image")
+    image_description: str = Field(description="a short description of the image")
+    #  main_objects: list[str] = Field(description="list of the main objects on the picture")
+# Set verbose
+# globals.set_debug(True)
+@chain
+def image_model(inputs: dict) -> str | list[str] | dict:
+    """Invoke model with image and prompt."""
+    model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024)
+    msg = model.invoke(
+                [HumanMessage(
+                content=[
+                {"type": "text", "text": inputs["prompt"]},
+                {"type": "text", "text": parser.get_format_instructions()},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}},
+                ])]
+                )
+    return msg.content
+load_image_chain = TransformChain(
+    input_variables=["image_path"],
+    output_variables=["image"],
+    transform=load_image
+)
+parser = JsonOutputParser(pydantic_object=ImageInformation)
+def get_image_informations(image_path: str) -> dict:
+    vision_prompt = """
+    Given the image, the image is a commercial product. I want to get the information for listing this product on online store. provide the following information:
+    - The extracted text written on the product.
+    - Title of the product in image based on the extracted text
+    """
+    vision_chain = load_image_chain | image_model | parser
+    return vision_chain.invoke({'image_path': f'{image_path}',
+                                'prompt': vision_prompt})
+def parse_json_response(response):
+    # Remove the enclosing markers if present
+    if response.startswith("```json") and response.endswith("```"):
+        response = response[7:-3].strip()
+    # Load the response as a JSON object
+    data = json.loads(response)
+    # Find the key that contains the list of items
+    listings_key = None
+    for key, value in data.items():
+        if isinstance(value, list) and all(isinstance(item, dict) for item in value):
+            listings_key = key
+            break
+    if not listings_key:
+        raise ValueError("No valid listings key found in the response")
+    listings = data[listings_key]
+    # Create a list to store the parsed dictionaries
+    parsed_data = []
+    # Iterate through each item in the listings
+    for item in listings:
+        # Extract the title and features
+        title = item.get("Title", "")
+        features = item.get("Features", [])
+        # Create a dictionary for each item
+        item_dict = {
+            "Title": title,
+            "Features": features
+        }
+        # Append the dictionary to the list
+        parsed_data.append(item_dict)
+    return parsed_data
+def main(image_path):
+    # try:
+        # Upload image to Imgur and get the URL
+        image_url = upload_image_to_imgur(image_path)
+        print(f"Image uploaded to Imgur: {image_url}")
+        # Perform reverse image search
+        search_results = reverse_image_search(image_url)
+        if 'error' in search_results:
+            print("Error in Serp API:", search_results['error'])
+        # Extract titles and descriptions
+        serp_results = extract_titles_and_descriptions(search_results)
+        print("Serp Result: ",serp_results, "\n\n\n\n")
+        gpt_vision_result = get_image_informations(image_path)
+        print("GPT Vision Result: ", gpt_vision_result, "\n\n\n\n")
+        # Prompt to generate the JSON for the product listing
+        prompt = f'''
+        You have results from a SERP API and GPT Vision. The SERP API provides related product information, while GPT Vision gives exact extracted texts and a suitable title for the product image.
+        Your task is to generate titles and feature lists for an e-commerce listing in JSON format. Prioritize the accurate GPT Vision data, using SERP API data ONLY if it is relevent to GPT Vision result.
+        #### SERP Results:
+        {serp_results}
+        #### GPT Vision Result:
+        {gpt_vision_result}
+        Generate a JSON for product listing (at Least THREE) based on the above results.
+        #### Please provide in the form of a json. Following is the format of the json::
+        {{
+            "Listings": [
+                {{
+                    "Title": "Example Title",
+                    "Features": [
+                        "Feature 1",
+                        "Feature 2",
+                        "Feature 3",
+                        .,
+                        .,
+                        .,
+                        .,
+                        .,
+                        "feature N"
+                    ]
+                }},
+                {{
+                    "Title": "Example Title",
+                    "Features": [
+                        "Feature 1",
+                        "Feature 2",
+                        "Feature 3",
+                        .,
+                        .,
+                        .,
+                        .,
+                        .,
+                        "feature N"
+                    ]
+                }}
+            ]
+        }}
+        '''
+        gpt_model = OpenAI(api_key=gpt_api_key)
+        # Call the ChatGPT 3.5 model using the chat completion endpoint
+        response = gpt_model.chat.completions.create(model="gpt-3.5-turbo",temperature=0,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": prompt}
+        ])
+        # Extract the text from the response
+        generated_text = response.choices[0].message.content
+        print("Generated Text: ",generated_text)
+        parsed_data = parse_json_response(generated_text)
+        # Print the ChatGPT response
+        return parsed_data
+if __name__ == "__main__":
+    image_path = 'sampleImages/edited3.jpg'  # Replace with the path to your local image
+    main(image_path)

requirements.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+async-timeout==4.0.3
+attrs==23.2.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+dataclasses-json==0.6.7
+distro==1.9.0
+dnspython==2.6.1
+email_validator==2.2.0
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.4
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.6.1
+gradio==4.31.5
+gradio_client==0.16.4
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+idna==3.7
+importlib_resources==6.4.0
+Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+langchain==0.2.0
+langchain-core==0.2.11
+langchain-openai==0.1.7
+langchain-text-splitters==0.2.2
+langsmith==0.1.83
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.21.3
+matplotlib==3.9.0
+mdurl==0.1.2
+multidict==6.0.5
+mypy-extensions==1.0.0
+numpy==1.26.4
+openai==1.35.9
+orjson==3.10.6
+packaging==24.1
+pandas==2.2.2
+pillow==10.3.0
+pydantic==2.8.0
+pydantic_core==2.20.0
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.5.0
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+SQLAlchemy==2.0.31
+starlette==0.37.2
+tenacity==8.4.2
+tiktoken==0.7.0
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.4
+typer==0.12.3
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3
+yarl==1.9.4

serp_imgur.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Imgur and SERP API credentials
+imgur_client_id = os.getenv('imgur_client_id')
+serp_api_key = os.getenv('serp_api_key')
+search_endpoint = 'https://serpapi.com/search'
+def upload_image_to_imgur(image_path):
+    headers = {'Authorization': f'Client-ID {imgur_client_id}'}
+    data = {'image': open(image_path, 'rb').read()}
+    response = requests.post('https://api.imgur.com/3/image', headers=headers, files=data)
+    response_data = response.json()
+    if response.status_code == 200 and response_data['success']:
+        return response_data['data']['link']
+    else:
+        raise Exception(f"Error uploading image to Imgur: {response_data['data']['error']}")
+def reverse_image_search(image_url):
+    params = {
+        'engine': 'google_reverse_image',
+        'image_url': image_url,
+        # "image_content": image_url,
+        'api_key': serp_api_key
+    }
+    response = requests.get(search_endpoint, params=params)
+    return response.json()
+def extract_titles_and_descriptions(search_results, top_n=3):
+    titles_and_descriptions = []
+    for result in search_results.get('image_results', [])[:top_n]:
+        temp_dict = {}
+        title = result.get('title', '')
+        description = result.get('snippet', '')
+        temp_dict['title'] = title
+        temp_dict['description'] = description
+        titles_and_descriptions.append(temp_dict)
+    return titles_and_descriptions
+def main(image_path):
+    # try:
+        # Upload image to Imgur and get the URL
+        image_url = upload_image_to_imgur(image_path)
+        print(f"Image uploaded to Imgur: {image_url}")
+        # Perform reverse image search
+        search_results = reverse_image_search(image_url)
+        if 'error' in search_results:
+            print("Error:", search_results['error'])
+            return
+        # Extract titles and descriptions
+        titles_and_descriptions = extract_titles_and_descriptions(search_results)
+        print(titles_and_descriptions)
+        # Print results
+        # for idx, (title, description) in enumerate(titles_and_descriptions):
+        #     print(f"Result {idx+1}:")
+        #     print("Title:", title)
+        #     print("Description:", description)
+        #     print("-" * 50)
+    # except Exception as e:
+    #     print(f"An error occurred: {e}")
+if __name__ == "__main__":
+    image_path = 'sampleImages/edited3.jpg'  # Replace with the path to your local image
+    main(image_path)