YangWu001 commited on
Commit
b87df6e
·
1 Parent(s): 22d1b4b
Files changed (1) hide show
  1. app.py +6 -31
app.py CHANGED
@@ -13,7 +13,6 @@ stop_inference = False
13
 
14
  def respond(
15
  message,
16
- history: list[tuple[str, str]],
17
  system_message,
18
  max_tokens,
19
  temperature,
@@ -24,15 +23,8 @@ def respond(
24
  stop_inference = False # Reset cancellation flag
25
 
26
  if use_local_model:
27
- # Simulate local inference
28
  messages = [{"role": "system", "content": system_message}]
29
-
30
- for val in history:
31
- if val[0]:
32
- messages.append({"role": "user", "content": val[0]})
33
- if val[1]:
34
- messages.append({"role": "assistant", "content": val[1]})
35
-
36
  messages.append({"role": "user", "content": message})
37
 
38
  response = ""
@@ -47,18 +39,9 @@ def respond(
47
  response += token
48
  yield response # Yielding response directly
49
 
50
- # Ensure the history is updated after generating the response
51
- history[-1] = (message, response) # Update the last tuple in history with the full response
52
- yield history # Yield the updated history
53
-
54
  else:
55
- # API-based inference
56
  messages = [{"role": "system", "content": system_message}]
57
- for val in history:
58
- if val[0]:
59
- messages.append({"role": "user", "content": val[0]})
60
- if val[1]:
61
- messages.append({"role": "assistant", "content": val[1]})
62
  messages.append({"role": "user", "content": message})
63
 
64
  response = ""
@@ -76,10 +59,6 @@ def respond(
76
  response += token
77
  yield response # Yielding response directly
78
 
79
- # Ensure the history is updated after generating the response
80
- history[-1] = (message, response) # Update the last tuple in history with the full response
81
- yield history # Yield the updated history
82
-
83
  def cancel_inference():
84
  global stop_inference
85
  stop_inference = True
@@ -141,7 +120,7 @@ with gr.Blocks(css=custom_css) as demo:
141
 
142
  with gr.Row():
143
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
144
- temperature = gr.Slider(minimum=0.1, maximum = 4.0, value=0.7, step=0.1, label="Temperature")
145
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
146
 
147
  chat_history = gr.Chatbot(label="Chat")
@@ -150,11 +129,9 @@ with gr.Blocks(css=custom_css) as demo:
150
 
151
  cancel_button = gr.Button("Cancel Inference", variant="danger")
152
 
153
- def chat_fn(message, history):
154
- history.append((message, "")) # Initialize with empty response
155
  response_gen = respond(
156
  message,
157
- history,
158
  system_message.value,
159
  max_tokens.value,
160
  temperature.value,
@@ -165,11 +142,9 @@ with gr.Blocks(css=custom_css) as demo:
165
  for response in response_gen:
166
  full_response += response # Accumulate the full response
167
 
168
- # Replace the last history tuple with the complete message-response pair
169
- history[-1] = (message, full_response)
170
- yield history
171
 
172
- user_input.submit(chat_fn, [user_input, chat_history], chat_history)
173
  cancel_button.click(cancel_inference)
174
 
175
  if __name__ == "__main__":
 
13
 
14
  def respond(
15
  message,
 
16
  system_message,
17
  max_tokens,
18
  temperature,
 
23
  stop_inference = False # Reset cancellation flag
24
 
25
  if use_local_model:
26
+ # Simulate local inference (ignoring history)
27
  messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
28
  messages.append({"role": "user", "content": message})
29
 
30
  response = ""
 
39
  response += token
40
  yield response # Yielding response directly
41
 
 
 
 
 
42
  else:
43
+ # API-based inference (ignoring history)
44
  messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
45
  messages.append({"role": "user", "content": message})
46
 
47
  response = ""
 
59
  response += token
60
  yield response # Yielding response directly
61
 
 
 
 
 
62
  def cancel_inference():
63
  global stop_inference
64
  stop_inference = True
 
120
 
121
  with gr.Row():
122
  max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
123
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
124
  top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
125
 
126
  chat_history = gr.Chatbot(label="Chat")
 
129
 
130
  cancel_button = gr.Button("Cancel Inference", variant="danger")
131
 
132
+ def chat_fn(message):
 
133
  response_gen = respond(
134
  message,
 
135
  system_message.value,
136
  max_tokens.value,
137
  temperature.value,
 
142
  for response in response_gen:
143
  full_response += response # Accumulate the full response
144
 
145
+ return full_response
 
 
146
 
147
+ user_input.submit(chat_fn, user_input, chat_history)
148
  cancel_button.click(cancel_inference)
149
 
150
  if __name__ == "__main__":