#%%
import os
import openai
import gradio as gr
from PIL import Image
import sys
sys.path.append('./')
from gpt_helper import GPT4VisionClass, response_to_json

# Placeholder for the model variable and the confirmation text
model = None
model_status = "Model is not initialized."

def initialize_model(api_key):
    global model, model_status
    if model is None:
        model = GPT4VisionClass(key=api_key, max_tokens=1024, temperature=0.9,
                                gpt_model="gpt-4-vision-preview",
                                role_msg="You are a helpful agent with vision capabilities; do not respond to objects not depicted in images.")
        model_status = "Model initialized successfully with the provided API key."
    else:
        model_status = "Model has already been initialized."
    return model_status

def add_text(state, query_text, image_paths=None, images=None):
    if model is None:
        return state, [("Error", "Model is not initialized. Please enter your OpenAI API Key.")]
    images = image_paths if image_paths is not None else images
    response_interaction = model.chat(query_text=query_text, image_paths=image_paths, images=None,
                                      PRINT_USER_MSG=False, PRINT_GPT_OUTPUT=False,
                                      RESET_CHAT=False, RETURN_RESPONSE=True, VISUALIZE=False, DETAIL='high')
    result = model._get_response_content()
    state.append((query_text, result))
    return state, state

def scenario_button_clicked(scenario_name):
    print(f"Scenario clicked: {scenario_name}")
    return f"Scenario clicked: {scenario_name}"

if __name__ == "__main__":
    # Define image paths for each subcategory under the main categories
    image_paths = {
        "Semantic Preference": {
            "Color Preference": "./images/semantic/color/4.webp",
            "Shape Preference": "./images/semantic/shape/5.webp",
            "Category Preference: Fruits and Beverages ": "./images/semantic/category/1/5.webp",
            "Category Preference: Beverages and Snacks": "./images/semantic/category/2/5.webp",
        },
        "Spatial Pattern Preference": {
            "Vertical Line": "./images/spatial-pattern/vertical/5.webp",
            "Horizontal Line": "./images/spatial-pattern/horizontal/5.webp",
            "Diagonal Line": "./images/spatial-pattern/diagonal/4.webp",
            "Quadrants": "./images/spatial-pattern/quadrant/5.webp",
        },
    }

    with gr.Blocks() as demo:
        ######## Introduction for the demo
        with gr.Column():
            gr.Markdown("""
            <div style='text-align: center;'>
                <span style='font-size: 32px; font-weight: bold;'>[Running Examples] <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span>
            </div>
            """)
            gr.Markdown("""
                In this paper, we focus on the problem of inferring underlying human preferences from a sequence of raw visual observations in tabletop manipulation environments with a variety of object types, named **V**isual **P**reference **I**nference (**VPI**).
                To facilitate visual reasoning in the context of manipulation, we introduce the <span style='color: #FF9300;'>C</span>hain-<span style='color: #FF9300;'>o</span>f-<span style='color: #FF9300;'>V</span>isual-<span style='color: #FF9300;'>R</span>esiduals</span> (<span style='color: #FF9300;'>CoVR</span>) method. <span style='color: #FF9300;'>CoVR</span> employs a prompting mechanism
            """)

        with gr.Row():
            for category, scenarios in image_paths.items():
                with gr.Column():
                    gr.Markdown(f"## {category}")
                    with gr.Row():
                        for scenario, img_path in scenarios.items():
                            with gr.Column(scale=2):
                                # img = Image.open(img_path)
                                # gr.Image(value=img, visible=True)
                                # gr.Image(value=img, visible=True, type="pil")
                                gr.Image(f"/file={img_path}", visible=True)
                                scenario_button = gr.Button(scenario)
                                scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[])
                                # scenario_button.click(fn=lambda x=scenario: scenario_button_clicked(x), inputs=[], outputs=[output_text])

        ######## Input OpenAI API Key and display initialization result
        with gr.Row():
            # API Key Input
            with gr.Column():
                openai_gpt4_key = gr.Textbox(label="OpenAI GPT4 Key", type="password", placeholder="sk..",
                                             info="You have to provide your own GPT4 keys for this app to function properly")
                initialize_button = gr.Button("Initialize Model")
            # Initialization Button and Result Display
            with gr.Column():
                model_status_text = gr.Text(label="Initialize API Result", info="The result of the model initialization will be displayed here.")
                initialize_button.click(initialize_model, inputs=[openai_gpt4_key], outputs=[model_status_text])

        ######## Chatbot
        chatbot = gr.Chatbot(elem_id="chatbot")
        state = gr.State([])
        with gr.Row():
            query_text = gr.Textbox(show_label=False, placeholder="Enter text and press enter, or upload an image")
            query_text.submit(add_text, inputs=[state, query_text], outputs=[state, chatbot])
            query_text.submit(lambda: "", inputs=None, outputs=query_text)

    demo.launch(share=True)