Spaces:
Runtime error
Runtime error
#%% | |
import os | |
import openai | |
import gradio as gr | |
from PIL import Image | |
import sys | |
sys.path.append('./') | |
from gpt_helper import GPT4VisionClass, response_to_json | |
# Placeholder for the model variable and the confirmation text | |
model = None | |
model_status = "Model is not initialized." | |
def initialize_model(api_key): | |
global model, model_status | |
if model is None: | |
model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9, | |
gpt_model="gpt-4-vision-preview", | |
role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.") | |
model_status = "Model initialized successfully with the provided API key." | |
else: | |
model_status = "Model has already been initialized." | |
return model_status | |
def add_text(state, query_text, image_paths=None, images=None): | |
if model is None: | |
return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")] | |
images = image_paths if image_paths is not None else images | |
response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None, | |
PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False, | |
RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high') | |
result = model._get_response_content() | |
state.append((query_text, result)) | |
return state, state | |
def scenario_button_clicked(scenario_name): | |
print(f"Scenario clicked: {scenario_name}") | |
return f"Scenario clicked: {scenario_name}" | |
if __name__ == "__main__": | |
# Define image paths for each subcategory under the main categories | |
image_paths = { | |
"Semantic Preference": { | |
"Color Preference": "./images/semantic/color/4.webp", | |
"Shape Preference": "./images/semantic/shape/5.webp", | |
"Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp", | |
"Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp", | |
}, | |
"Spatial Pattern Preference": { | |
"Vertical Line": "./images/spatial-pattern/vertical/5.webp", | |
"Horizontal Line": "./images/spatial-pattern/horizontal/5.webp", | |
"Diagonal Line": "./images/spatial-pattern/diagonal/4.webp", | |
"Quadrants": "./images/spatial-pattern/quadrant/5.webp", | |
}, | |
} | |
with gr.Blocks() as demo: | |
######## Introduction for the demo | |
with gr.Column(): | |
gr.Markdown(""" | |
<div style='text-align: center;'> | |
<span style='font-size: 32px; font-weight: bold;'>[Running Examples] <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> | |
</div> | |
""") | |
gr.Markdown(""" | |
In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named **V**isual **P**reference **I**nference (**VPI**). | |
To facilitate visual reasoning in the context of manipulation, we introduce the <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> (<span style='color: #FF9300;'>CoVR</span>) method. <span style='color: #FF9300;'>CoVR</span> employs a prompting mechanism | |
""") | |
with gr.Row(): | |
for category, scenarios in image_paths.items(): | |
with gr.Column(): | |
gr.Markdown(f"## {category}") | |
with gr.Row(): | |
for scenario, img_path in scenarios.items(): | |
with gr.Column(scale=2): | |
# img = Image.open(img_path) | |
# gr.Image(value=img, visible=True) | |
# gr.Image(value=img, visible=True, type="pil") | |
gr.Image(f"/file={img_path}", visible=True) | |
scenario_button = gr.Button(scenario) | |
scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[]) | |
# scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text]) | |
######## Input OpenAI API Key and display initialization result | |
with gr.Row(): | |
# API Key Input | |
with gr.Column(): | |
openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..", | |
info="You have to provide your own GPT4 keys for this app to function properly") | |
initialize_button = gr.Button("Initialize Model") | |
# Initialization Button and Result Display | |
with gr.Column(): | |
model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.") | |
initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text]) | |
######## Chatbot | |
chatbot = gr.Chatbot(elem_id="chatbot") | |
state = gr.State([]) | |
with gr.Row(): | |
query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image") | |
query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot]) | |
query_text.submit(lambda: "", inputs=None, outputs=query_text) | |
demo.launch(share=True) |