import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai

# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")


def format_chat_history(messages: list) -> list:
    """
    Formats the chat history into a structure Gemini can understand
    """
    formatted_history = []
    for message in messages:
        # Skip thinking messages (messages with metadata)
        if not (message.get("role") == "assistant" and "metadata" in message):
            formatted_history.append({
                "role": "user" if message.get("role") == "user" else "assistant",
                "parts": [message.get("content", "")]
            })
    return formatted_history

def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
    """
    Streams thoughts and response with conversation history support.
    """
    try:
        print(f"\n=== New Request ===")
        print(f"User message: {user_message}")

        # Format chat history for Gemini
        chat_history = format_chat_history(messages)

        # Initialize Gemini chat
        chat = model.start_chat(history=chat_history)
        response = chat.send_message(user_message, stream=True)

        # Initialize buffers and flags
        thought_buffer = ""
        response_buffer = ""
        thinking_complete = False

        # Add initial thinking message
        messages.append(
            ChatMessage(
                role="assistant",
                content="",
                metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
            )
        )

        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text

            if len(parts) == 2 and not thinking_complete:
                # Complete thought and start response
                thought_buffer += current_chunk
                print(f"\n=== Complete Thought ===\n{thought_buffer}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages

                # Start response
                response_buffer = parts[1].text
                print(f"\n=== Starting Response ===\n{response_buffer}")

                messages.append(
                    ChatMessage(
                        role="assistant",
                        content=response_buffer
                    )
                )
                thinking_complete = True

            elif thinking_complete:
                # Stream response
                response_buffer += current_chunk
                print(f"\n=== Response Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )

            else:
                # Stream thinking
                thought_buffer += current_chunk
                print(f"\n=== Thinking Chunk ===\n{current_chunk}")

                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )

            yield messages

        print(f"\n=== Final Response ===\n{response_buffer}")

    except Exception as e:
        print(f"\n=== Error ===\n{str(e)}")
        messages.append(
            ChatMessage(
                role="assistant",
                content=f"I apologize, but I encountered an error: {str(e)}"
            )
        )
        yield messages

def user_message(msg: str, history: list) -> tuple[str, list]:
    """Adds user message to chat history"""
    history.append(ChatMessage(role="user", content=msg))
    return "", history


# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="slate", neutral_hue="neutral")) as demo: # Using Soft theme with adjusted hues for a refined look
    gr.Markdown("# Gemini 2.0 Flash 'Thinking' Chatbot 💭")

    chatbot = gr.Chatbot(
        type="messages",
        label="Gemini2.0 'Thinking' Chatbot",
        render_markdown=True,
        scale=1,
        avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
    )

    with gr.Row(equal_height=True):
        input_box = gr.Textbox(
            lines=1,
            label="Chat Message",
            placeholder="Type your message here...",
            scale=4
        )

        clear_button = gr.Button("Clear Chat", scale=1)

    # Set up event handlers
    msg_store = gr.State("")  # Store for preserving user message

    input_box.submit(
        lambda msg: (msg, msg, ""),  # Store message and clear input
        inputs=[input_box],
        outputs=[msg_store, input_box, input_box],
        queue=False
    ).then(
        user_message,  # Add user message to chat
        inputs=[msg_store, chatbot],
        outputs=[input_box, chatbot],
        queue=False
    ).then(
        stream_gemini_response,  # Generate and stream response
        inputs=[msg_store, chatbot],
        outputs=chatbot
    )

    clear_button.click(
        lambda: ([], "", ""),
        outputs=[chatbot, input_box, msg_store],
        queue=False
    )

    gr.Markdown(  # Description moved to the bottom
        """
        <br><br><br>  <!-- Add some vertical space -->
        ---
        ### About this Chatbot

        This chatbot demonstrates the experimental 'thinking' capability of the **Gemini 2.0 Flash** model.
        You can observe the model's thought process as it generates responses, displayed with the "⚙️ Thinking" prefix.

        **Key Features:**

        *   Powered by Google's **Gemini 2.0 Flash** model.
        *   Shows the model's **thoughts** before the final answer (experimental feature).
        *   Supports **conversation history** for multi-turn chats.
        *   Uses **streaming** for a more interactive experience.

        **Instructions:**

        1.  Type your message in the input box below.
        2.  Press Enter or click Submit to send.
        3.  Observe the chatbot's "Thinking" process followed by the final response.
        4.  Use the "Clear Chat" button to start a new conversation.

        *Please note*: The 'thinking' feature is experimental and the quality of thoughts may vary.
        """
    )


# Launch the interface
if __name__ == "__main__":
    demo.launch(debug=True)