Spaces:

Asilbek14
/

zephyr-for-mobile

Sleeping

App Files Files Community

Asilbek14 commited on Aug 19

Commit

b4f77ad

verified ·

1 Parent(s): 08ea239

Integrated a translation model

Browse files

Files changed (1) hide show

app.py +42 -28

app.py CHANGED Viewed

@@ -1,8 +1,11 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 # ---------------- CONFIG ----------------
 MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
 SYSTEM_PROMPT_DEFAULT = (
     "You are Zephyr, a concise and polite AI assistant. "
     "Answer briefly unless the user specifically asks for detail."
@@ -12,12 +15,40 @@ MAX_NEW_TOKENS_DEFAULT = 128
 TEMP_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
-# Create client (calls Hugging Face Inference API, not local model)
 client = InferenceClient(MODEL_REPO)
 # ---------------- CHAT FUNCTION ----------------
 def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
-    # adjust style
     if response_style == "Concise":
         system_message += " Keep answers short and direct."
     elif response_style == "Detailed":
@@ -25,7 +56,6 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
     elif response_style == "Essay":
         system_message += " Write long, structured, essay-style responses."
-    # build conversation
     messages = [{"role": "system", "content": system_message}] + chat_history
     messages.append({"role": "user", "content": message})
@@ -39,7 +69,6 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
     ):
         token = msg.choices[0].delta.content or ""
         response += token
-        # yield new history in messages format
         yield "", chat_history + [
             {"role": "user", "content": message},
             {"role": "assistant", "content": response}
@@ -50,44 +79,29 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
     gr.Markdown(
         """
-        # 📱 Zephyr-7B (Hosted on Hugging Face Inference API)
-        Optimized for **mobile-friendly chat** ✨
-        <span style="opacity:0.7">Powered by HuggingFaceH4/zephyr-7b-beta</span>
         """
     )
-    chatbot = gr.Chatbot(
-        type="messages",  # ✅ use messages format
-        height=500,
-        show_copy_button=True,
-        label="Chat"
-    )
     with gr.Row():
-        msg = gr.Textbox(
-            label="💬 Message",
-            placeholder="Type your message…",
-            scale=6
-        )
         send_btn = gr.Button("🚀", variant="primary", scale=1)
         clear_btn = gr.Button("🧹", scale=1)
     with gr.Accordion("⚙️ Settings", open=False):
-        system_prompt = gr.Textbox(
-            label="System Prompt",
-            value=SYSTEM_PROMPT_DEFAULT,
-            lines=3
-        )
         response_style = gr.Dropdown(
-            ["Concise", "Detailed", "Essay"],
-            value="Concise",
-            label="Response Style"
         )
         temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
         top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
         max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
-    # Events (streaming response)
     send_btn.click(
         stream_response,
         [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
@@ -101,4 +115,4 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
     clear_btn.click(lambda: [], None, chatbot, queue=False)
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+from transformers import pipeline
 # ---------------- CONFIG ----------------
 MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
+TRANSLATOR_MODEL = "facebook/m2m100_418M"  # multilingual translator
 SYSTEM_PROMPT_DEFAULT = (
     "You are Zephyr, a concise and polite AI assistant. "
     "Answer briefly unless the user specifically asks for detail."
 TEMP_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
+# Clients
 client = InferenceClient(MODEL_REPO)
+translator = pipeline("translation", model=TRANSLATOR_MODEL)
+# ---------------- HELPERS ----------------
+def is_translation_request(message: str) -> bool:
+    """
+    Heuristics: if user explicitly asks to translate OR if message is not English.
+    """
+    triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
+    if any(t in message.lower() for t in triggers):
+        return True
+    # naive non-English detection (if >40% chars non-ASCII)
+    non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
+    return non_ascii_ratio > 0.4
 # ---------------- CHAT FUNCTION ----------------
 def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
+    # check if translation
+    if is_translation_request(message):
+        try:
+            translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
+            return "", chat_history + [
+                {"role": "user", "content": message},
+                {"role": "assistant", "content": translated}
+            ]
+        except Exception as e:
+            return "", chat_history + [
+                {"role": "user", "content": message},
+                {"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"}
+            ]
+    # Otherwise → normal Zephyr response
     if response_style == "Concise":
         system_message += " Keep answers short and direct."
     elif response_style == "Detailed":
     elif response_style == "Essay":
         system_message += " Write long, structured, essay-style responses."
     messages = [{"role": "system", "content": system_message}] + chat_history
     messages.append({"role": "user", "content": message})
     ):
         token = msg.choices[0].delta.content or ""
         response += token
         yield "", chat_history + [
             {"role": "user", "content": message},
             {"role": "assistant", "content": response}
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
     gr.Markdown(
         """
+        # 🌍 Zephyr-7B with Multilingual Support
+        - 💬 Normal chat powered by **Zephyr-7B**
+        - 🌐 Translation powered by **M2M100** (auto-detects non-English or "translate" requests)
         """
     )
+    chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat")
     with gr.Row():
+        msg = gr.Textbox(label="💬 Message", placeholder="Type your message…", scale=6)
         send_btn = gr.Button("🚀", variant="primary", scale=1)
         clear_btn = gr.Button("🧹", scale=1)
     with gr.Accordion("⚙️ Settings", open=False):
+        system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
         response_style = gr.Dropdown(
+            ["Concise", "Detailed", "Essay"], value="Concise", label="Response Style"
         )
         temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
         top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
         max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
+    # Events
     send_btn.click(
         stream_response,
         [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
     clear_btn.click(lambda: [], None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.launch()