#%% import os import openai import gradio as gr from PIL import Image import sys sys.path.append('./') from gpt_helper import GPT4VisionClass, response_to_json # Placeholder for the model variable and the confirmation text model = None model_status = "Model is not initialized." def initialize_model(api_key): global model, model_status if model is None: model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9, gpt_model="gpt-4-vision-preview", role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.") model_status = "Model initialized successfully with the provided API key." else: model_status = "Model has already been initialized." return model_status def add_text(state, query_text, image_paths=None, images=None): if model is None: return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")] images = image_paths if image_paths is not None else images response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None, PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False, RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high') result = model._get_response_content() state.append((query_text, result)) return state, state def scenario_button_clicked(scenario_name): print(f"Scenario clicked: {scenario_name}") return f"Scenario clicked: {scenario_name}" if __name__ == "__main__": # Define image paths for each subcategory under the main categories image_paths = { "Semantic Preference": { "Color Preference": "./images/semantic/color/4.webp", "Shape Preference": "./images/semantic/shape/5.webp", "Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp", "Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp", }, "Spatial Pattern Preference": { "Vertical Line": "./images/spatial-pattern/vertical/5.webp", "Horizontal Line": "./images/spatial-pattern/horizontal/5.webp", "Diagonal Line": "./images/spatial-pattern/diagonal/4.webp", "Quadrants": "./images/spatial-pattern/quadrant/5.webp", }, } with gr.Blocks() as demo: ######## Introduction for the demo with gr.Column(): gr.Markdown("""
[Running Examples] Chain-of-Visual-Residuals
""") gr.Markdown(""" In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named **V**isual **P**reference **I**nference (**VPI**). To facilitate visual reasoning in the context of manipulation, we introduce the Chain-of-Visual-Residuals (CoVR) method. CoVR employs a prompting mechanism """) with gr.Row(): for category, scenarios in image_paths.items(): with gr.Column(): gr.Markdown(f"## {category}") with gr.Row(): for scenario, img_path in scenarios.items(): with gr.Column(scale=2): # img = Image.open(img_path) # gr.Image(value=img, visible=True) # gr.Image(value=img, visible=True, type="pil") gr.Image(f"/file={img_path}", visible=True) scenario_button = gr.Button(scenario) scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[]) # scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text]) ######## Input OpenAI API Key and display initialization result with gr.Row(): # API Key Input with gr.Column(): openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..", info="You have to provide your own GPT4 keys for this app to function properly") initialize_button = gr.Button("Initialize Model") # Initialization Button and Result Display with gr.Column(): model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.") initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text]) ######## Chatbot chatbot = gr.Chatbot(elem_id="chatbot") state = gr.State([]) with gr.Row(): query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image") query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot]) query_text.submit(lambda: "", inputs=None, outputs=query_text) demo.launch(share=True)