Spaces:
Running
Running
File size: 4,800 Bytes
f92b0d5 b4f77ad f92b0d5 5214a6c f7a5317 b4f77ad 5f4efa7 f7a5317 5f4efa7 f92b0d5 f7a5317 f92b0d5 b4f77ad 5214a6c b4f77ad 5214a6c 5f4efa7 b4f77ad 70e88c8 f7a5317 70e88c8 f7a5317 70e88c8 f7a5317 0a2169b f92b0d5 f7a5317 2d6f15d af25cff f7a5317 2d6f15d 5214a6c 2d6f15d 5214a6c af25cff 5214a6c af25cff f92b0d5 5214a6c af25cff 4e13938 f92b0d5 4e13938 b4f77ad 0a2169b 70e88c8 f7a5317 70e88c8 0a2169b 5214a6c f7a5317 08ea239 f92b0d5 4e13938 f92b0d5 d02b539 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline
# ---------------- CONFIG ----------------
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
TRANSLATOR_MODEL = "facebook/m2m100_418M"
SYSTEM_PROMPT_DEFAULT = (
"You are Zephyr, a concise and polite AI assistant. "
"Always respond formally and answer appropriately depending on the selected explanation style."
)
# ✅ Optimized defaults
MAX_NEW_TOKENS_DEFAULT = 300
TEMP_DEFAULT = 0.3
TOP_P_DEFAULT = 0.9
# Clients
client = InferenceClient(MODEL_REPO)
translator = pipeline("translation", model=TRANSLATOR_MODEL)
# ---------------- HELPERS ----------------
def is_translation_request(message: str) -> bool:
triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
if any(t in message.lower() for t in triggers):
return True
non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
return non_ascii_ratio > 0.4
# ---------------- CHAT FUNCTION ----------------
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
if is_translation_request(message):
try:
translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": translated})
yield "", chat_history
return
except Exception as e:
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"})
yield "", chat_history
return
# Apply response style
if response_style == "No explanation":
system_message += " Only provide the direct answer with no explanation."
elif response_style == "Short explanation":
system_message += " Provide a concise answer with a one-sentence explanation."
elif response_style == "Detailed explanation":
system_message += " Provide a thorough and detailed answer with reasoning and examples."
messages = [{"role": "system", "content": system_message}] + chat_history
messages.append({"role": "user", "content": message})
# Append user first
chat_history.append({"role": "user", "content": message})
response = ""
chat_history.append({"role": "assistant", "content": ""}) # placeholder
try:
for msg in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = msg.choices[0].delta.content or ""
response += token
chat_history[-1]["content"] = response
yield "", chat_history
except Exception as e:
chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}"
yield "", chat_history
yield "", chat_history
# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
with gr.Row():
msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6)
send_btn = gr.Button("🚀 Send", variant="primary", scale=1)
clear_btn = gr.Button("🧹 Clear Chat", scale=1)
with gr.Accordion("⚙️ Advanced Settings", open=False):
system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
response_style = gr.Dropdown(
["No explanation", "Short explanation", "Detailed explanation"],
value="Detailed explanation",
label="Response Style"
)
temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
# Events
send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
clear_btn.click(lambda: [], None, chatbot, queue=False)
gr.Markdown("---")
gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).")
if __name__ == "__main__":
demo.launch() |