Asilbek14 commited on
Commit
af25cff
Β·
verified Β·
1 Parent(s): df49a7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -28
app.py CHANGED
@@ -11,9 +11,10 @@ SYSTEM_PROMPT_DEFAULT = (
11
  "Always respond in a formal tone and provide only the direct answer unless the user requests more detail."
12
  )
13
 
14
- MAX_NEW_TOKENS_DEFAULT = 128
15
  TEMP_DEFAULT = 0.7
16
  TOP_P_DEFAULT = 0.95
 
17
 
18
  # Clients
19
  client = InferenceClient(MODEL_REPO)
@@ -31,6 +32,7 @@ def is_translation_request(message: str) -> bool:
31
 
32
  # ---------------- CHAT FUNCTION ----------------
33
  def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
 
34
  if is_translation_request(message):
35
  try:
36
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
@@ -44,21 +46,27 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
44
  yield "", chat_history
45
  return
46
 
47
- # Apply response style
48
  if response_style == "No explanation":
49
- system_message += " Only provide the direct answer with no explanation."
50
  elif response_style == "Short explanation":
51
- system_message += " Provide a concise answer with a one-sentence explanation."
52
- elif response_style == "Detailed explanation":
53
- system_message += " Provide a thorough and detailed answer with reasoning and examples."
54
-
55
- messages = [{"role": "system", "content": system_message}] + chat_history
 
 
 
56
  messages.append({"role": "user", "content": message})
57
 
 
58
  chat_history.append({"role": "user", "content": message})
59
- response = ""
60
  chat_history.append({"role": "assistant", "content": ""})
61
 
 
 
 
62
  for msg in client.chat_completion(
63
  messages,
64
  max_tokens=max_tokens,
@@ -71,30 +79,18 @@ def stream_response(message, chat_history, system_message, max_tokens, temperatu
71
  chat_history[-1]["content"] = response
72
  yield "", chat_history
73
 
 
74
  yield "", chat_history
75
 
76
 
77
  # ---------------- UI ----------------
78
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
79
- gr.Markdown(
80
- """
81
- # πŸ€– Zephyr-7B Chat + 🌍 Translator
82
- """
83
- )
84
 
85
- chatbot = gr.Chatbot(
86
- type="messages",
87
- height=500,
88
- show_copy_button=True,
89
- label="Chat Assistant"
90
- )
91
 
92
  with gr.Row():
93
- msg = gr.Textbox(
94
- label="πŸ’¬ Your Message",
95
- placeholder="Type here and press Enter or click πŸš€",
96
- scale=6
97
- )
98
  send_btn = gr.Button("πŸš€ Send", variant="primary", scale=1)
99
  clear_btn = gr.Button("🧹 Clear Chat", scale=1)
100
 
@@ -102,14 +98,14 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink"))
102
  system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
103
  response_style = gr.Dropdown(
104
  ["No explanation", "Short explanation", "Detailed explanation"],
105
- value="No explanation", # βœ… default set here
106
  label="Response Style"
107
  )
108
  temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
109
  top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
110
- max_tokens = gr.Slider(32, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
111
 
112
- # Events
113
  send_btn.click(
114
  stream_response,
115
  [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],
 
11
  "Always respond in a formal tone and provide only the direct answer unless the user requests more detail."
12
  )
13
 
14
+ MAX_NEW_TOKENS_DEFAULT = 512 # increased to handle long answers
15
  TEMP_DEFAULT = 0.7
16
  TOP_P_DEFAULT = 0.95
17
+ MAX_HISTORY_MESSAGES = 10 # limit chat history to prevent repetition
18
 
19
  # Clients
20
  client = InferenceClient(MODEL_REPO)
 
32
 
33
  # ---------------- CHAT FUNCTION ----------------
34
  def stream_response(message, chat_history, system_message, max_tokens, temperature, top_p, response_style):
35
+ # --- Translation handling ---
36
  if is_translation_request(message):
37
  try:
38
  translated = translator(message, src_lang="auto", tgt_lang="en")[0]["translation_text"]
 
46
  yield "", chat_history
47
  return
48
 
49
+ # --- Apply response style ---
50
  if response_style == "No explanation":
51
+ style_prompt = " Only provide the direct answer with no explanation."
52
  elif response_style == "Short explanation":
53
+ style_prompt = " Provide a concise answer with a one-sentence explanation."
54
+ else: # Detailed explanation
55
+ style_prompt = " Provide a thorough and detailed answer with reasoning and examples."
56
+
57
+ # --- Prepare messages ---
58
+ # Only keep the last N messages to prevent repetition
59
+ truncated_history = chat_history[-MAX_HISTORY_MESSAGES:]
60
+ messages = [{"role": "system", "content": system_message + style_prompt}] + truncated_history
61
  messages.append({"role": "user", "content": message})
62
 
63
+ # Append user and placeholder for assistant
64
  chat_history.append({"role": "user", "content": message})
 
65
  chat_history.append({"role": "assistant", "content": ""})
66
 
67
+ response = ""
68
+
69
+ # --- Stream response ---
70
  for msg in client.chat_completion(
71
  messages,
72
  max_tokens=max_tokens,
 
79
  chat_history[-1]["content"] = response
80
  yield "", chat_history
81
 
82
+ # Clear input box after streaming
83
  yield "", chat_history
84
 
85
 
86
  # ---------------- UI ----------------
87
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="pink")) as demo:
88
+ gr.Markdown("# πŸ€– Zephyr-7B Chat + 🌍 Translator")
 
 
 
 
89
 
90
+ chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat Assistant")
 
 
 
 
 
91
 
92
  with gr.Row():
93
+ msg = gr.Textbox(label="πŸ’¬ Your Message", placeholder="Type here…", scale=6)
 
 
 
 
94
  send_btn = gr.Button("πŸš€ Send", variant="primary", scale=1)
95
  clear_btn = gr.Button("🧹 Clear Chat", scale=1)
96
 
 
98
  system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT_DEFAULT, lines=3)
99
  response_style = gr.Dropdown(
100
  ["No explanation", "Short explanation", "Detailed explanation"],
101
+ value="No explanation",
102
  label="Response Style"
103
  )
104
  temperature = gr.Slider(0.1, 1.5, value=TEMP_DEFAULT, step=0.1, label="Temperature")
105
  top_p = gr.Slider(0.1, 1.0, value=TOP_P_DEFAULT, step=0.05, label="Top-p")
106
+ max_tokens = gr.Slider(128, 2048, value=MAX_NEW_TOKENS_DEFAULT, step=16, label="Max new tokens")
107
 
108
+ # --- Events ---
109
  send_btn.click(
110
  stream_response,
111
  [msg, chatbot, system_prompt, max_tokens, temperature, top_p, response_style],