import gradio as gr from huggingface_hub import InferenceClient from transformers import pipeline # ---------------- CONFIG ---------------- MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta" TRANSLATOR_MODEL = "facebook/m2m100_418M" SYSTEM_PROMPT_DEFAULT = ( "You are Zephyr, a concise and polite AI assistant. " "Always respond formally and answer appropriately depending on the selected explanation style." ) # ✅ Optimized defaults MAX_NEW_TOKENS_DEFAULT = 300 TEMP_DEFAULT = 0.3 TOP_P_DEFAULT = 0.9 # Clients client = InferenceClient(MODEL_REPO) translator = pipeline("translation", model=TRANSLATOR_MODEL) # ---------------- HELPERS ---------------- def is_translation_request(message: str) -> bool: triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"] if any(t in message.lower() for t in triggers): return True non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1) return non_ascii_ratio > 0.4 # ---------------- CHAT FUNCTION ---------------- def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style): if is_translation_request(message): try: translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"] chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": translated}) yield "", chat_history return except Exception as e: chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": f"⚠️ Translation failed: {str(e)}"}) yield "", chat_history return # Apply response style if response_style == "No explanation": system_message += " Only provide the direct answer with no explanation." elif response_style == "Short explanation": system_message += " Provide a concise answer with a one-sentence explanation." elif response_style == "Detailed explanation": system_message += " Provide a thorough and detailed answer with reasoning and examples." messages = [{"role": "system", "content": system_message}] + chat_history messages.append({"role": "user", "content": message}) # Append user first chat_history.append({"role": "user", "content": message}) response = "" chat_history.append({"role": "assistant", "content": ""}) # placeholder try: for msg in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = msg.choices[0].delta.content or "" response += token chat_history[-1]["content"] = response yield "", chat_history except Exception as e: chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}" yield "", chat_history yield "", chat_history # ---------------- UI ---------------- with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo: gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator") chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant") with gr.Row(): msg = gr.Textbox(label="💬 Your Message", placeholder="Type here…", scale=6) send_btn = gr.Button("🚀 Send", variant="primary", scale=1) clear_btn = gr.Button("🧹 Clear Chat", scale=1) with gr.Accordion("⚙️ Advanced Settings", open=False): system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3) response_style = gr.Dropdown( ["No explanation", "Short explanation", "Detailed explanation"], value="Detailed explanation", label="Response Style" ) temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p") max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens") # Events send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot]) msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot]) clear_btn.click(lambda: [], None, chatbot, queue=False) gr.Markdown("---") gr.Markdown("🔗 Built with ❤️ using [Zephyr-7B](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) & [M2M100](https://huggingface.co/facebook/m2m100_418M).") if __name__ == "__main__": demo.launch()