vpi / app.py
joonh-robotics's picture
Upload app.py
e00f43e verified
#%%
import os
import openai
import gradio as gr
from PIL import Image
import sys
sys.path.append('./')
from gpt_helper import GPT4VisionClass, response_to_json
# Placeholder for the model variable and the confirmation text
model = None
model_status = "Model is not initialized."
def initialize_model(api_key):
global model, model_status
if model is None:
model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9,
gpt_model="gpt-4-vision-preview",
role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.")
model_status = "Model initialized successfully with the provided API key."
else:
model_status = "Model has already been initialized."
return model_status
def add_text(state, query_text, image_paths=None, images=None):
if model is None:
return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")]
images = image_paths if image_paths is not None else images
response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None,
PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False,
RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high')
result = model._get_response_content()
state.append((query_text, result))
return state, state
def scenario_button_clicked(scenario_name):
print(f"Scenario clicked: {scenario_name}")
return f"Scenario clicked: {scenario_name}"
if __name__ == "__main__":
# Define image paths for each subcategory under the main categories
image_paths = {
"Semantic Preference": {
"Color Preference": "./images/semantic/color/4.webp",
"Shape Preference": "./images/semantic/shape/5.webp",
"Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp",
"Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp",
},
"Spatial Pattern Preference": {
"Vertical Line": "./images/spatial-pattern/vertical/5.webp",
"Horizontal Line": "./images/spatial-pattern/horizontal/5.webp",
"Diagonal Line": "./images/spatial-pattern/diagonal/4.webp",
"Quadrants": "./images/spatial-pattern/quadrant/5.webp",
},
}
with gr.Blocks() as demo:
######## Introduction for the demo
with gr.Column():
gr.Markdown("""
<div style='text-align: center;'>
<span style='font-size: 32px; font-weight: bold;'>[Running Examples] <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span>
</div>
""")
gr.Markdown("""
In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named **V**isual **P**reference **I**nference (**VPI**).
To facilitate visual reasoning in the context of manipulation, we introduce the <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> (<span style='color: #FF9300;'>CoVR</span>) method. <span style='color: #FF9300;'>CoVR</span> employs a prompting mechanism
""")
with gr.Row():
for category, scenarios in image_paths.items():
with gr.Column():
gr.Markdown(f"## {category}")
with gr.Row():
for scenario, img_path in scenarios.items():
with gr.Column(scale=2):
# img = Image.open(img_path)
# gr.Image(value=img, visible=True)
# gr.Image(value=img, visible=True, type="pil")
gr.Image(f"/file={img_path}", visible=True)
scenario_button = gr.Button(scenario)
scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[])
# scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text])
######## Input OpenAI API Key and display initialization result
with gr.Row():
# API Key Input
with gr.Column():
openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..",
info="You have to provide your own GPT4 keys for this app to function properly")
initialize_button = gr.Button("Initialize Model")
# Initialization Button and Result Display
with gr.Column():
model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.")
initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text])
######## Chatbot
chatbot = gr.Chatbot(elem_id="chatbot")
state = gr.State([])
with gr.Row():
query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image")
query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot])
query_text.submit(lambda: "", inputs=None, outputs=query_text)
demo.launch(share=True)