import gradio as gr from huggingface_hub import InferenceClient # ---------------- CONFIG ---------------- MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta" SYSTEM_PROMPT_DEFAULT = "You are Zephyr, a helpful, concise and polite AI assistant." MAX_NEW_TOKENS_DEFAULT = 512 TEMP_DEFAULT = 0.7 TOP_P_DEFAULT = 0.95 # Create client (calls Hugging Face Inference API, not local model) client = InferenceClient(MODEL_REPO) # ---------------- CHAT FUNCTION ---------------- def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] for user_msg, bot_msg in chat_history: if user_msg: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) response = "" for msg in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = msg.choices[0].delta.content or "" response += token yield "", chat_history + [(message, response)] # ---------------- UI ---------------- with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo: gr.Markdown( """ # 📱 Zephyr-7B (Hosted on Hugging Face Inference API) Optimized for **mobile-friendly chat** ✨ Powered by HuggingFaceH4/zephyr-7b-beta """ ) chatbot = gr.Chatbot( height=500, bubble_full_width=False, show_copy_button=True, label="Chat" ) with gr.Row(): msg = gr.Textbox( label="💬 Message", placeholder="Type your message…", scale=6 ) send_btn = gr.Button("🚀", variant="primary", scale=1) clear_btn = gr.Button("🧹", scale=1) with gr.Accordion("⚙️ Settings", open=False): system_prompt = gr.Textbox( label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3 ) temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p") max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens") # Events (streaming response) send_btn.click( stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p], [msg, chatbot] ) msg.submit( stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p], [msg, chatbot] ) clear_btn.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": demo.launch()