import gradio as gr
from functools import lru_cache

# Cache model loading to optimize performance
@lru_cache(maxsize=3)
def load_hf_model(model_name):
    # Use the Gradio-built huggingface loader instead of transformers_gradio
    return gr.load(
        name=f"huggingface/deepseek-ai/{model_name}",
        src="huggingface",  # Changed from transformers_gradio.registry
        api_name="chat",
    )

# Load all models at startup
MODELS = {
    "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
    "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
    "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
}

# --- Chatbot function ---
def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
    history = history or []

    # Get the selected model component
    model_component = MODELS[model_choice]

    # Create payload for the model
    payload = [
        history,  # Pass the entire history 
        input_text,
        system_message,
        max_new_tokens,
        temperature,
        top_p
    ]

    # Run inference using the selected model
    try:
        response = model_component(payload) # the response now it is a tuple containing the updated history as the first element and the generated text as the second
        updated_history, assistant_response = response[0], response[1]

        history = updated_history

    except Exception as e:
        assistant_response = f"Error: {str(e)}"
        history.append((input_text, assistant_response))

    return history, history, ""

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as demo:
    gr.Markdown(
        """
        # DeepSeek Chatbot
        Created by [ruslanmv.com](https://ruslanmv.com/)
        This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
        You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
        """
    )

    with gr.Row():
        with gr.Column():
            chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500)
            msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
            with gr.Row():
                submit_btn = gr.Button("Submit", variant="primary")
                clear_btn = gr.ClearButton([msg, chatbot_output])

    with gr.Row():
        with gr.Accordion("Options", open=True):
            model_choice = gr.Radio(
                choices=list(MODELS.keys()),
                label="Choose a Model",
                value="DeepSeek-R1"
            )
            with gr.Accordion("Optional Parameters", open=False):
                system_message = gr.Textbox(
                    label="System Message",
                    value="You are a friendly Chatbot created by ruslanmv.com",
                    lines=2,
                )
                max_new_tokens = gr.Slider(
                    minimum=1, maximum=4000, value=200, label="Max New Tokens"
                )
                temperature = gr.Slider(
                    minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
                )
                top_p = gr.Slider(
                    minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
                )

    chat_history = gr.State([])

    # Event handling
    submit_btn.click(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )
    msg.submit(
        chatbot,
        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
        [chatbot_output, chat_history, msg]
    )

if __name__ == "__main__":
    demo.launch()