Spaces:

joonh-robotics
/

vpi

Runtime error

App Files Files Community

vpi / app.py

joonh-robotics

Upload app.py

e00f43e verified 12 months ago

raw

history blame contribute delete

5.82 kB

	#%%
	import os
	import openai
	import gradio as gr
	from PIL import Image
	import sys
	sys.path.append('./')
	from gpt_helper import GPT4VisionClass, response_to_json

	# Placeholder for the model variable and the confirmation text
	model = None
	model_status = "Model is not initialized."

	def initialize_model(api_key):
	global model, model_status
	if model is None:
	model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9,
	gpt_model="gpt-4-vision-preview",
	role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.")
	model_status = "Model initialized successfully with the provided API key."
	else:
	model_status = "Model has already been initialized."
	return model_status

	def add_text(state, query_text, image_paths=None, images=None):
	if model is None:
	return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")]
	images = image_paths if image_paths is not None else images
	response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None,
	PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False,
	RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high')
	result = model._get_response_content()
	state.append((query_text, result))
	return state, state

	def scenario_button_clicked(scenario_name):
	print(f"Scenario clicked: {scenario_name}")
	return f"Scenario clicked: {scenario_name}"

	if __name__ == "__main__":
	# Define image paths for each subcategory under the main categories
	image_paths = {
	"Semantic Preference": {
	"Color Preference": "./images/semantic/color/4.webp",
	"Shape Preference": "./images/semantic/shape/5.webp",
	"Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp",
	"Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp",
	},
	"Spatial Pattern Preference": {
	"Vertical Line": "./images/spatial-pattern/vertical/5.webp",
	"Horizontal Line": "./images/spatial-pattern/horizontal/5.webp",
	"Diagonal Line": "./images/spatial-pattern/diagonal/4.webp",
	"Quadrants": "./images/spatial-pattern/quadrant/5.webp",
	},
	}

	with gr.Blocks() as demo:
	######## Introduction for the demo
	with gr.Column():
	gr.Markdown("""
	<div style='text-align: center;'>
	<span style='font-size: 32px; font-weight: bold;'>[Running Examples] <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span>
	</div>
	""")
	gr.Markdown("""
	In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named Visual Preference Inference (VPI).
	To facilitate visual reasoning in the context of manipulation, we introduce the <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> (<span style='color: #FF9300;'>CoVR</span>) method. <span style='color: #FF9300;'>CoVR</span> employs a prompting mechanism
	""")

	with gr.Row():
	for category, scenarios in image_paths.items():
	with gr.Column():
	gr.Markdown(f"## {category}")
	with gr.Row():
	for scenario, img_path in scenarios.items():
	with gr.Column(scale=2):
	# img = Image.open(img_path)
	# gr.Image(value=img, visible=True)
	# gr.Image(value=img, visible=True, type="pil")
	gr.Image(f"/file={img_path}", visible=True)
	scenario_button = gr.Button(scenario)
	scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[])
	# scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text])

	######## Input OpenAI API Key and display initialization result
	with gr.Row():
	# API Key Input
	with gr.Column():
	openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..",
	info="You have to provide your own GPT4 keys for this app to function properly")
	initialize_button = gr.Button("Initialize Model")
	# Initialization Button and Result Display
	with gr.Column():
	model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.")
	initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text])

	######## Chatbot
	chatbot = gr.Chatbot(elem_id="chatbot")
	state = gr.State([])
	with gr.Row():
	query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image")
	query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot])
	query_text.submit(lambda: "", inputs=None, outputs=query_text)

	demo.launch(share=True)