import gradio as gr
from huggingface_hub import InferenceClient, login
import os
from typing import List, Tuple, Optional

# Available models for selection
AVAILABLE_MODELS = [
    "Qwen/Qwen3-Coder-480B-A35B-Instruct",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
    "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
    "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
    "VIDraft/Gemma-3-R1984-12B",
]

def initialize_client(token: str, model: str) -> Optional[InferenceClient]:
    """Initialize the InferenceClient with the provided token and model."""
    try:
        login(token)
        return InferenceClient(model=model)
    except Exception as e:
        return gr.Error(f"Failed to initialize client: {str(e)}")

def respond(
    message: str,
    history: List[Tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    model: str,
    token: str,
) -> str:
    """
    Generate a response using the Hugging Face Inference API.
    Docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
    """
    if not token:
        raise gr.Error("Please provide a valid Hugging Face API token.")
    if not message.strip():
        raise gr.Error("Input message cannot be empty.")

    client = initialize_client(token, model)
    if isinstance(client, gr.Error):
        raise client

    # Build message history
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    try:
        for chunk in client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = chunk.choices[0].delta.content or ""
            response += token
            yield response
    except Exception as e:
        raise gr.Error(f"Error during inference: {str(e)}")

# Load token from environment variable for security
HF_TOKEN = os.getenv("HF_TOKEN", "")

# Create Gradio interface
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(
            value="You are a friendly and helpful Chatbot.",
            label="System Message",
            placeholder="Enter the system prompt here...",
        ),
        gr.Slider(
            minimum=1,
            maximum=2048,
            value=512,
            step=1,
            label="Max New Tokens",
            info="Controls the maximum length of the generated response.",
        ),
        gr.Slider(
            minimum=0.1,
            maximum=4.0,
            value=0.7,
            step=0.1,
            label="Temperature",
            info="Controls randomness (higher = more creative, lower = more deterministic).",
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (Nucleus Sampling)",
            info="Controls diversity via nucleus sampling.",
        ),
        gr.Dropdown(
            choices=AVAILABLE_MODELS,
            value=AVAILABLE_MODELS[0],
            label="Model Selection",
            info="Select the model to use for inference.",
        ),
        gr.Textbox(
            value=HF_TOKEN,
            label="Hugging Face API Token",
            type="password",
            placeholder="Enter your HF API token (or set HF_TOKEN env variable)",
        ),
    ],
    title="Chatbot with Hugging Face Inference API",
    description="Interact with a chatbot powered by Hugging Face models. Provide your API token and customize settings.",
    theme="base",

)

if __name__ == "__main__":
    demo.launch()