Spaces:
Running
Running
File size: 2,911 Bytes
f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 5214a6c f92b0d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from huggingface_hub import InferenceClient
# ---------------- CONFIG ----------------
MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
SYSTEM_PROMPT_DEFAULT = "You are Zephyr, a helpful, concise and polite AI assistant."
MAX_NEW_TOKENS_DEFAULT = 512
TEMP_DEFAULT = 0.7
TOP_P_DEFAULT = 0.95
# Create client (calls Hugging Face Inference API, not local model)
client = InferenceClient(MODEL_REPO)
# ---------------- CHAT FUNCTION ----------------
def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
for user_msg, bot_msg in chat_history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = ""
for msg in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = msg.choices[0].delta.content or ""
response += token
yield "", chat_history + [(message, response)]
# ---------------- UI ----------------
with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
gr.Markdown(
"""
# 📱 Zephyr-7B (Hosted on Hugging Face Inference API)
Optimized for **mobile-friendly chat** ✨
<span style="opacity:0.7">Powered by HuggingFaceH4/zephyr-7b-beta</span>
"""
)
chatbot = gr.Chatbot(
height=500,
bubble_full_width=False,
show_copy_button=True,
label="Chat"
)
with gr.Row():
msg = gr.Textbox(
label="💬 Message",
placeholder="Type your message…",
scale=6
)
send_btn = gr.Button("🚀", variant="primary", scale=1)
clear_btn = gr.Button("🧹", scale=1)
with gr.Accordion("⚙️ Settings", open=False):
system_prompt = gr.Textbox(
label="System Prompt",
value=SYSTEM_PROMPT_DEFAULT,
lines=3
)
temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
# Events (streaming response)
send_btn.click(
stream_response,
[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
[msg, chatbot]
)
msg.submit(
stream_response,
[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
[msg, chatbot]
)
clear_btn.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch()
|