Spaces:
Sleeping
Sleeping
from langchain_core.output_parsers import JsonOutputParser | |
import base64 | |
from langchain.chains import TransformChain | |
from langchain_core.messages import HumanMessage | |
from langchain_openai import ChatOpenAI | |
from langchain import globals | |
from langchain_core.runnables import chain | |
from langchain_core.pydantic_v1 import BaseModel, Field | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Set up your OpenAI API key | |
os.environ["OPENAI_API_KEY"] = os.getenv('gpt_api_key') | |
def load_image(inputs: dict) -> dict: | |
"""Load image from file and encode it as base64.""" | |
image_path = inputs["image_path"] | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
image_base64 = encode_image(image_path) | |
return {"image": image_base64} | |
load_image_chain = TransformChain( | |
input_variables=["image_path"], | |
output_variables=["image"], | |
transform=load_image | |
) | |
class ImageInformation(BaseModel): | |
"""Information about an image.""" | |
Title: str = Field(description="Suitable title for the given product in image") | |
image_description: str = Field(description="a short description of the image") | |
# main_objects: list[str] = Field(description="list of the main objects on the picture") | |
# Set verbose | |
# globals.set_debug(True) | |
def image_model(inputs: dict) -> str | list[str] | dict: | |
"""Invoke model with image and prompt.""" | |
model = ChatOpenAI(temperature=0.5, model="gpt-4-vision-preview", max_tokens=1024) | |
msg = model.invoke( | |
[HumanMessage( | |
content=[ | |
{"type": "text", "text": inputs["prompt"]}, | |
{"type": "text", "text": parser.get_format_instructions()}, | |
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{inputs['image']}"}}, | |
])] | |
) | |
return msg.content | |
parser = JsonOutputParser(pydantic_object=ImageInformation) | |
def get_image_informations(image_path: str) -> dict: | |
vision_prompt = """ | |
Given the image, provide the following information: | |
- Title of the product in image | |
- A description of the product in image based on the text written in image | |
""" | |
vision_chain = load_image_chain | image_model | parser | |
return vision_chain.invoke({'image_path': f'{image_path}', | |
'prompt': vision_prompt}) | |
gpt_vision_result = get_image_informations("sampleImages/edited3.jpg") | |
print(gpt_vision_result) | |