Spaces:

Asilbek14
/

zephyr-for-mobile

Running

File size: 4,800 Bytes

f92b0d5
 
b4f77ad
f92b0d5
5214a6c
 
f7a5317
b4f77ad
5f4efa7
 
f7a5317
5f4efa7
f92b0d5
f7a5317
 
 
 
f92b0d5
b4f77ad
5214a6c
b4f77ad
 
 
 
 
 
 
 
 
 
5214a6c
5f4efa7
b4f77ad
70e88c8
 
 
 
 
 
 
 
 
 
 
 
f7a5317
70e88c8
f7a5317
70e88c8
f7a5317
 
 
 
 
0a2169b
f92b0d5
f7a5317
2d6f15d
af25cff
f7a5317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d6f15d
5214a6c
2d6f15d
5214a6c
 
 
af25cff
5214a6c
af25cff
f92b0d5
5214a6c
af25cff
4e13938
 
f92b0d5
4e13938
b4f77ad
0a2169b
70e88c8
f7a5317
70e88c8
0a2169b
5214a6c
 
f7a5317
 
 
 
 
08ea239
f92b0d5
4e13938
 
 
f92b0d5
d02b539

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline

# ---------------- CONFIG ----------------
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
TRANSLATOR_MODEL = "facebook/m2m100_418M"

SYSTEM_PROMPT_DEFAULT = (
    "You are Zephyr, a concise and polite AI assistant. "
    "Always respond formally and answer appropriately depending on the selected explanation style."
)

# ✅ Optimized defaults
MAX_NEW_TOKENS_DEFAULT = 300
TEMP_DEFAULT = 0.3
TOP_P_DEFAULT = 0.9

# Clients
client = InferenceClient(MODEL_REPO)
translator = pipeline("translation", model=TRANSLATOR_MODEL)

# ---------------- HELPERS ----------------
def is_translation_request(message: str) -> bool:
    triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
    if any(t in message.lower() for t in triggers):
        return True
    non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
    return non_ascii_ratio > 0.4

# ---------------- CHAT FUNCTION ----------------
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
    if is_translation_request(message):
        try:
            translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
            chat_history.append({"role": "user", "content": message})
            chat_history.append({"role": "assistant", "content": translated})
            yield "", chat_history
            return
        except Exception as e:
            chat_history.append({"role": "user", "content": message})
            chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
            yield "", chat_history
            return

    # Apply response style
    if response_style == "No explanation":
        system_message += " Only provide the direct answer with no explanation."
    elif response_style == "Short explanation":
        system_message += " Provide a concise answer with a one-sentence explanation."
    elif response_style == "Detailed explanation":
        system_message += " Provide a thorough and detailed answer with reasoning and examples."

    messages = [{"role": "system", "content": system_message}] + chat_history
    messages.append({"role": "user", "content": message})

    # Append user first
    chat_history.append({"role": "user", "content": message})
    response = ""
    chat_history.append({"role": "assistant", "content": ""})  # placeholder

    try:
        for msg in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = msg.choices[0].delta.content or ""
            response += token
            chat_history[-1]["content"] = response
            yield "", chat_history
    except Exception as e:
        chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}"
        yield "", chat_history

    yield "", chat_history

# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
    gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")

    chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")

    with gr.Row():
        msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6)
        send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
        clear_btn = gr.Button("🧹 Clear Chat", scale=1)

    with gr.Accordion("⚙️ Advanced Settings", open=False):
        system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
        response_style = gr.Dropdown(
            ["No explanation", "Short explanation", "Detailed explanation"],
            value="Detailed explanation",
            label="Response Style"
        )
        temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
        top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
        max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")

    # Events
    send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
    msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
    clear_btn.click(lambda: [], None, chatbot, queue=False)

    gr.Markdown("---")
    gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")

if __name__ == "__main__":
    demo.launch()