Asilbek14 commited on
Commit
f7a5317
·
verified ·
1 Parent(s): feb390d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -50
app.py CHANGED
@@ -4,23 +4,22 @@ from transformers import pipeline
4
 
5
  # ---------------- CONFIG ----------------
6
  MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
7
- TRANSLATOR_MODEL = "facebook/m2m100_418M" # multilingual translator
8
 
9
  SYSTEM_PROMPT_DEFAULT = (
10
  "You are Zephyr, a concise and polite AI assistant. "
11
- "Always respond in a formal tone and provide only the direct answer unless the user requests more detail."
12
  )
13
 
14
- MAX_NEW_TOKENS_DEFAULT = 512 # increased to handle long answers
15
- TEMP_DEFAULT = 0.7
16
- TOP_P_DEFAULT = 0.95
17
- MAX_HISTORY_MESSAGES = 10 # limit chat history to prevent repetition
18
 
19
  # Clients
20
  client = InferenceClient(MODEL_REPO)
21
  translator = pipeline("translation", model=TRANSLATOR_MODEL)
22
 
23
-
24
  # ---------------- HELPERS ----------------
25
  def is_translation_request(message: str) -> bool:
26
  triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
@@ -29,10 +28,8 @@ def is_translation_request(message: str) -> bool:
29
  non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
30
  return non_ascii_ratio > 0.4
31
 
32
-
33
  # ---------------- CHAT FUNCTION ----------------
34
  def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
35
- # --- Translation handling ---
36
  if is_translation_request(message):
37
  try:
38
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
@@ -46,43 +43,40 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
46
  yield "", chat_history
47
  return
48
 
49
- # --- Apply response style ---
50
  if response_style == "No explanation":
51
- style_prompt = " Only provide the direct answer with no explanation."
52
  elif response_style == "Short explanation":
53
- style_prompt = " Provide a concise answer with a one-sentence explanation."
54
- else: # Detailed explanation
55
- style_prompt = " Provide a thorough and detailed answer with reasoning and examples."
56
-
57
- # --- Prepare messages ---
58
- # Only keep the last N messages to prevent repetition
59
- truncated_history = chat_history[-MAX_HISTORY_MESSAGES:]
60
- messages = [{"role": "system", "content": system_message + style_prompt}] + truncated_history
61
  messages.append({"role": "user", "content": message})
62
 
63
- # Append user and placeholder for assistant
64
  chat_history.append({"role": "user", "content": message})
65
- chat_history.append({"role": "assistant", "content": ""})
66
-
67
  response = ""
68
-
69
- # --- Stream response ---
70
- for msg in client.chat_completion(
71
- messages,
72
- max_tokens=max_tokens,
73
- stream=True,
74
- temperature=temperature,
75
- top_p=top_p,
76
- ):
77
- token = msg.choices[0].delta.content or ""
78
- response += token
79
- chat_history[-1]["content"] = response
 
 
 
 
80
  yield "", chat_history
81
 
82
- # Clear input box after streaming
83
  yield "", chat_history
84
 
85
-
86
  # ---------------- UI ----------------
87
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
88
  gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")
@@ -98,24 +92,16 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
98
  system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
99
  response_style = gr.Dropdown(
100
  ["No explanation", "Short explanation", "Detailed explanation"],
101
- value="No explanation",
102
  label="Response Style"
103
  )
104
  temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
105
  top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
106
- max_tokens = gr.Slider(128, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
107
-
108
- # --- Events ---
109
- send_btn.click(
110
- stream_response,
111
- [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
112
- [msg, chatbot]
113
- )
114
- msg.submit(
115
- stream_response,
116
- [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
117
- [msg, chatbot]
118
- )
119
  clear_btn.click(lambda: [], None, chatbot, queue=False)
120
 
121
  gr.Markdown("---")
 
4
 
5
  # ---------------- CONFIG ----------------
6
  MODEL_REPO = "HuggingFaceH4/zephyr-7b-beta"
7
+ TRANSLATOR_MODEL = "facebook/m2m100_418M"
8
 
9
  SYSTEM_PROMPT_DEFAULT = (
10
  "You are Zephyr, a concise and polite AI assistant. "
11
+ "Always respond formally and answer appropriately depending on the selected explanation style."
12
  )
13
 
14
+ # Optimized defaults
15
+ MAX_NEW_TOKENS_DEFAULT = 300
16
+ TEMP_DEFAULT = 0.3
17
+ TOP_P_DEFAULT = 0.9
18
 
19
  # Clients
20
  client = InferenceClient(MODEL_REPO)
21
  translator = pipeline("translation", model=TRANSLATOR_MODEL)
22
 
 
23
  # ---------------- HELPERS ----------------
24
  def is_translation_request(message: str) -> bool:
25
  triggers = ["translate", "traduce", "ترجم", "traduire", "übersetze"]
 
28
  non_ascii_ratio = sum(1 for c in message if ord(c) > 127) / max(len(message), 1)
29
  return non_ascii_ratio > 0.4
30
 
 
31
  # ---------------- CHAT FUNCTION ----------------
32
  def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
 
33
  if is_translation_request(message):
34
  try:
35
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
 
43
  yield "", chat_history
44
  return
45
 
46
+ # Apply response style
47
  if response_style == "No explanation":
48
+ system_message += " Only provide the direct answer with no explanation."
49
  elif response_style == "Short explanation":
50
+ system_message += " Provide a concise answer with a one-sentence explanation."
51
+ elif response_style == "Detailed explanation":
52
+ system_message += " Provide a thorough and detailed answer with reasoning and examples."
53
+
54
+ messages = [{"role": "system", "content": system_message}] + chat_history
 
 
 
55
  messages.append({"role": "user", "content": message})
56
 
57
+ # Append user first
58
  chat_history.append({"role": "user", "content": message})
 
 
59
  response = ""
60
+ chat_history.append({"role": "assistant", "content": ""}) # placeholder
61
+
62
+ try:
63
+ for msg in client.chat_completion(
64
+ messages,
65
+ max_tokens=max_tokens,
66
+ stream=True,
67
+ temperature=temperature,
68
+ top_p=top_p,
69
+ ):
70
+ token = msg.choices[0].delta.content or ""
71
+ response += token
72
+ chat_history[-1]["content"] = response
73
+ yield "", chat_history
74
+ except Exception as e:
75
+ chat_history[-1]["content"] = f"⚠️ Error generating response: {str(e)}"
76
  yield "", chat_history
77
 
 
78
  yield "", chat_history
79
 
 
80
  # ---------------- UI ----------------
81
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
82
  gr.Markdown("# 🤖 Zephyr-7B Chat + 🌍 Translator")
 
92
  system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
93
  response_style = gr.Dropdown(
94
  ["No explanation", "Short explanation", "Detailed explanation"],
95
+ value="Detailed explanation",
96
  label="Response Style"
97
  )
98
  temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
99
  top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
100
+ max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
101
+
102
+ # Events
103
+ send_btn.click(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
104
+ msg.submit(stream_response, [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style], [msg, chatbot])
 
 
 
 
 
 
 
 
105
  clear_btn.click(lambda: [], None, chatbot, queue=False)
106
 
107
  gr.Markdown("---")