File size: 5,417 Bytes

f92b0d5
 
b4f77ad
d02b539
f92b0d5
5214a6c
 
b4f77ad
 
5f4efa7
 
 
 
f92b0d5
5f4efa7
5214a6c
 
f92b0d5
b4f77ad
5214a6c
b4f77ad
 
4e13938
b4f77ad
 
 
 
 
 
 
 
 
 
 
 
f92b0d5
d02b539
 
 
 
 
 
 
 
 
 
5214a6c
5f4efa7
b4f77ad
 
d02b539
 
 
 
 
b4f77ad
 
0a2169b
5f4efa7
0a2169b
5f4efa7
0a2169b
5f4efa7
 
08ea239
0a2169b
f92b0d5
2d6f15d
 
f92b0d5
2d6f15d
 
5214a6c
f92b0d5
 
 
 
 
 
5214a6c
f92b0d5
2d6f15d
 
5214a6c
2d6f15d
 
5214a6c
d02b539
5214a6c
 
 
 
4e13938
 
 
 
 
 
 
5214a6c
 
 
4e13938
 
 
 
 
 
f92b0d5
5214a6c
4e13938
 
 
 
 
 
 
f92b0d5
4e13938
b4f77ad
0a2169b
b4f77ad
0a2169b
5214a6c
 
 
f92b0d5
b4f77ad
5214a6c
0a2169b
 
 
5214a6c
 
0a2169b
 
 
5214a6c
08ea239
f92b0d5
4e13938
 
 
f92b0d5
d02b539

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline
from langdetect import detect  # 🔥 new for auto language detection

# ---------------- CONFIG ----------------
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
TRANSLATOR_MODEL = "facebook/m2m100_418M"  # multilingual translator

SYSTEM_PROMPT_DEFAULT = (
    "You are Zephyr, a concise and polite AI assistant. "
    "Answer briefly unless the user specifically asks for detail."
)

MAX_NEW_TOKENS_DEFAULT = 128
TEMP_DEFAULT = 0.7
TOP_P_DEFAULT = 0.95

# Clients
client = InferenceClient(MODEL_REPO)
translator = pipeline("translation", model=TRANSLATOR_MODEL)


# ---------------- HELPERS ----------------
def is_translation_request(message: str) -> bool:
    """
    Heuristics: if user explicitly asks to translate OR if message is not English.
    """
    triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
    if any(t in message.lower() for t in triggers):
        return True
    # naive non-English detection (if >40% chars non-ASCII)
    non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
    return non_ascii_ratio > 0.4


def translate_text(text: str) -> str:
    """Detects language and translates to English."""
    try:
        src_lang = detect(text)  # auto-detect like "ru", "es", "fr"
        result = translator(text, src_lang=src_lang, tgt_lang="en")
        return result[0]["translation_text"]
    except Exception as e:
        return f"⚠️ Translation failed: {str(e)}"


# ---------------- CHAT FUNCTION ----------------
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
    # check if translation
    if is_translation_request(message):
        translated = translate_text(message)
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": translated})
        yield "", chat_history
        return

    # Otherwise → normal Zephyr response
    if response_style == "Concise":
        system_message += " Keep answers short and direct."
    elif response_style == "Detailed":
        system_message += " Provide more explanation and context when helpful."
    elif response_style == "Essay":
        system_message += " Write long, structured, essay-style responses."

    messages = [{"role": "system", "content": system_message}] + chat_history
    messages.append({"role": "user", "content": message})

    # Append user first
    chat_history.append({"role": "user", "content": message})
    response = ""
    chat_history.append({"role": "assistant", "content": ""})  # placeholder for streaming

    for msg in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = msg.choices[0].delta.content or ""
        response += token
        chat_history[-1]["content"] = response  # update last assistant message
        yield "", chat_history

    # finally clear input box once after streaming is done
    yield "", chat_history


# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
    gr.Markdown(
        """
        # 🤖 Zephyr-7B Chat + 🌍 Translator  
        Welcome! This app combines **Zephyr-7B** (for smart chat) with **M2M100** (for multilingual translation).  
        **How to use:**
        - 💬 Ask *anything* → Zephyr will reply.  
        - 🌐 Paste non-English text or say "translate" → auto translation into English.  
        - ⚙️ Adjust settings in the panel if you want different styles.  
        ---
        """
    )

    chatbot = gr.Chatbot(
        type="messages",
        height=500,
        show_copy_button=True,
        label="Chat Assistant"
    )

    with gr.Row():
        msg = gr.Textbox(
            label="💬 Your Message",
            placeholder="Type here and press Enter or click 🚀",
            scale=6
        )
        send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
        clear_btn = gr.Button("🧹 Clear Chat", scale=1)

    with gr.Accordion("⚙️ Advanced Settings", open=False):
        system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
        response_style = gr.Dropdown(
            ["Concise", "Detailed", "Essay"], value="Concise", label="Response Style"
        )
        temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
        top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
        max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")

    # Events
    send_btn.click(
        stream_response,
        [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
        [msg, chatbot]
    )
    msg.submit(
        stream_response,
        [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
        [msg, chatbot]
    )
    clear_btn.click(lambda: [], None, chatbot, queue=False)

    gr.Markdown("---")
    gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")

if __name__ == "__main__":
    demo.launch()