import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline

# ---------------- CONFIG ----------------
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
TRANSLATOR_MODEL = "facebook/m2m100_418M"

SYSTEM_PROMPT_DEFAULT = (
    "You are Zephyr, a concise and polite AI assistant. "
    "Always respond formally and answer appropriately depending on the selected explanation style."
)

# ✅ Optimized defaults
MAX_NEW_TOKENS_DEFAULT = 300
TEMP_DEFAULT = 0.3
TOP_P_DEFAULT = 0.9

# Clients
client = InferenceClient(MODEL_REPO)
translator = pipeline("translation", model=TRANSLATOR_MODEL)

# ---------------- HELPERS ----------------
def is_translation_request(message: str) -> bool:
    triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
    if any(t in message.lower() for t in triggers):
        return True
    non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
    return non_ascii_ratio > 0.4

# ---------------- CHAT FUNCTION ----------------
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
    if is_translation_request(message):
        try:
            translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
            chat_history.append({"role": "user", "content": message})
            chat_history.append({"role": "assistant", "content": translated})
            yield "", chat_history
            return
        except Exception as e:
            chat_history.append({"role": "user", "content": message})
            chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
            yield "", chat_history
            return

    # Apply response style
    if response_style == "No explanation":
        system_message += " Only provide the direct answer with no explanation."
    elif response_style == "Short explanation":
        system_message += " Provide a concise answer with a one-sentence explanation."
    elif response_style == "Detailed explanation":
        system_message += " Provide a thorough and detailed answer with reasoning and examples."

    messages = [{"role": "system", "content": system_message}] + chat_history
    messages.append({"role": "user", "content": message})

    # Append user first
    chat_history.append({"role": "user", "content": message})
    response = ""
    chat_history.append({"role": "assistant", "content": ""})  # placeholder

    try:
        for msg in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = msg.choices[0].delta.content or ""
            response += token
            chat_history[-1]["content"] = response
            yield "", chat_history
    except Exception as e:
        chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}"
        yield "", chat_history

    yield "", chat_history

# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
    gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")

    chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")

    with gr.Row():
        msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6)
        send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
        clear_btn = gr.Button("🧹 Clear Chat", scale=1)

    with gr.Accordion("⚙️ Advanced Settings", open=False):
        system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
        response_style = gr.Dropdown(
            ["No explanation", "Short explanation", "Detailed explanation"],
            value="Detailed explanation",
            label="Response Style"
        )
        temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
        top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
        max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")

    # Events
    send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
    msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
    clear_btn.click(lambda: [], None, chatbot, queue=False)

    gr.Markdown("---")
    gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")

if __name__ == "__main__":
    demo.launch()