Spaces:

Asilbek14
/

zephyr-for-mobile

Sleeping

App Files Files

xet

Community

Asilbek14 commited on Aug 19

Commit

af25cff

verified ·

1 Parent(s): df49a7d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -28

app.py CHANGED Viewed

@@ -11,9 +11,10 @@ SYSTEM_PROMPT_DEFAULT = (
     "Always respond in a formal tone and provide only the direct answer unless the user requests more detail."
 )
-MAX_NEW_TOKENS_DEFAULT = 128
 TEMP_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
 # Clients
 client = InferenceClient(MODEL_REPO)
@@ -31,6 +32,7 @@ def is_translation_request(message: str) -> bool:
 # ---------------- CHAT FUNCTION ----------------
 def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
     if is_translation_request(message):
         try:
             translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
@@ -44,21 +46,27 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
             yield "", chat_history
             return
-    # Apply response style
     if response_style == "No explanation":
-        system_message += " Only provide the direct answer with no explanation."
     elif response_style == "Short explanation":
-        system_message += " Provide a concise answer with a one-sentence explanation."
-    elif response_style == "Detailed explanation":
-        system_message += " Provide a thorough and detailed answer with reasoning and examples."
-    messages = [{"role": "system", "content": system_message}] + chat_history
     messages.append({"role": "user", "content": message})
     chat_history.append({"role": "user", "content": message})
-    response = ""
     chat_history.append({"role": "assistant", "content": ""})
     for msg in client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -71,30 +79,18 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
         chat_history[-1]["content"] = response
         yield "", chat_history
     yield "", chat_history
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
-    gr.Markdown(
-        """
-        # 🤖 Zephyr-7B Chat + 🌍 Translator
-        """
-    )
-    chatbot = gr.Chatbot(
-        type="messages",
-        height=500,
-        show_copy_button=True,
-        label="Chat Assistant"
-    )
     with gr.Row():
-        msg = gr.Textbox(
-            label="💬 Your Message",
-            placeholder="Type here and press Enter or click 🚀",
-            scale=6
-        )
         send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
         clear_btn = gr.Button("🧹 Clear Chat", scale=1)
@@ -102,14 +98,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
         system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
         response_style = gr.Dropdown(
             ["No explanation", "Short explanation", "Detailed explanation"],
-            value="No explanation",  # ✅ default set here
             label="Response Style"
         )
         temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
         top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
-        max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
-    # Events
     send_btn.click(
         stream_response,
         [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],

     "Always respond in a formal tone and provide only the direct answer unless the user requests more detail."
 )
+MAX_NEW_TOKENS_DEFAULT = 512  # increased to handle long answers
 TEMP_DEFAULT = 0.7
 TOP_P_DEFAULT = 0.95
+MAX_HISTORY_MESSAGES = 10  # limit chat history to prevent repetition
 # Clients
 client = InferenceClient(MODEL_REPO)
 # ---------------- CHAT FUNCTION ----------------
 def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
+    # --- Translation handling ---
     if is_translation_request(message):
         try:
             translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
             yield "", chat_history
             return
+    # --- Apply response style ---
     if response_style == "No explanation":
+        style_prompt = " Only provide the direct answer with no explanation."
     elif response_style == "Short explanation":
+        style_prompt = " Provide a concise answer with a one-sentence explanation."
+    else:  # Detailed explanation
+        style_prompt = " Provide a thorough and detailed answer with reasoning and examples."
+    # --- Prepare messages ---
+    # Only keep the last N messages to prevent repetition
+    truncated_history = chat_history[-MAX_HISTORY_MESSAGES:]
+    messages = [{"role": "system", "content": system_message + style_prompt}] + truncated_history
     messages.append({"role": "user", "content": message})
+    # Append user and placeholder for assistant
     chat_history.append({"role": "user", "content": message})
     chat_history.append({"role": "assistant", "content": ""})
+    response = ""
+    # --- Stream response ---
     for msg in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         chat_history[-1]["content"] = response
         yield "", chat_history
+    # Clear input box after streaming
     yield "", chat_history
 # ---------------- UI ----------------
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
+    gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")
+    chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
     with gr.Row():
+        msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6)
         send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
         clear_btn = gr.Button("🧹 Clear Chat", scale=1)
         system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
         response_style = gr.Dropdown(
             ["No explanation", "Short explanation", "Detailed explanation"],
+            value="No explanation",
             label="Response Style"
         )
         temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
         top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
+        max_tokens = gr.Slider(128, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
+    # --- Events ---
     send_btn.click(
         stream_response,
         [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],