ffreemt commited on
Commit
8708b41
·
1 Parent(s): e7193a9
Files changed (1) hide show
  1. app.py +19 -10
app.py CHANGED
@@ -50,6 +50,9 @@ def predict(prompt, bot):
50
  system_prompt=default_system_prompt,
51
  user_prompt=prompt.strip(),
52
  )
 
 
 
53
  print(assistant_prefix, end=" ", flush=True)
54
 
55
  response = ""
@@ -91,10 +94,12 @@ def predict_api(prompt):
91
  max_new_tokens=512, # adjust as needed
92
  seed=42,
93
  reset=False, # reset history (cache)
94
- stream=False, # streaming per word/token
95
  threads=os.cpu_count() // 2, # type: ignore # adjust for your CPU
96
  stop=["<|im_end|>", "|<"],
97
  )
 
 
98
  generator = generate(
99
  LLM, _, system_prompt=default_system_prompt, user_prompt=prompt.strip()
100
  )
@@ -144,7 +149,7 @@ class GenerationConfig:
144
 
145
  def format_prompt(system_prompt: str, user_prompt: str):
146
  """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
147
- # May need to be modified for WizardCoder: TODO
148
 
149
  system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
150
  user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
@@ -229,6 +234,9 @@ if "WizardCoder" in MODEL_FILENAME:
229
  # threads=os.cpu_count() // 2 # type: ignore
230
  # )
231
 
 
 
 
232
  GENERATION_CONFIG = GenerationConfig(
233
  temperature=0.2,
234
  top_k=0,
@@ -238,8 +246,8 @@ GENERATION_CONFIG = GenerationConfig(
238
  seed=42,
239
  reset=False, # reset history (cache)
240
  stream=True, # streaming per word/token
241
- threads=os.cpu_count() // 2, # type: ignore # adjust for your CPU
242
- stop=["<|im_end|>", "|<"],
243
  )
244
 
245
  css = """
@@ -273,13 +281,14 @@ with gr.Blocks(
273
 
274
  Try to refresh the browser and try again when occasionally errors occur.
275
 
276
- It takes about >100 seconds to get a response. Restarting the space takes about 5 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 5 minutes to restart. You need to refresh the browser to reload the new space.
277
  """,
278
  elem_classes="xsmall",
279
  )
280
 
281
- chatbot = gr.Chatbot(scroll_to_output=True).style(height=700) # 500
282
- buff = gr.Textbox(show_label=False)
 
283
  with gr.Row():
284
  with gr.Column(scale=4):
285
  msg = gr.Textbox(
@@ -287,7 +296,7 @@ with gr.Blocks(
287
  placeholder="Ask me anything (press Enter or click Submit to send)",
288
  show_label=False,
289
  ).style(container=False)
290
- with gr.Column(scale=1):
291
  with gr.Row():
292
  submit = gr.Button("Submit", elem_classes="xsmall")
293
  stop = gr.Button("Stop", visible=False)
@@ -306,7 +315,7 @@ with gr.Blocks(
306
  change = gr.Button("Change System Prompt")
307
  reset = gr.Button("Reset System Prompt")
308
 
309
- with gr.Accordion("Example inputs", open=True):
310
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
311
  examples = gr.Examples(
312
  examples=[
@@ -358,7 +367,7 @@ with gr.Blocks(
358
  fn=predict,
359
  inputs=[msg, chatbot],
360
  outputs=[msg, chatbot],
361
- queue=True,
362
  show_progress="full",
363
  api_name="predict",
364
  )
 
50
  system_prompt=default_system_prompt,
51
  user_prompt=prompt.strip(),
52
  )
53
+
54
+ ns.generator = generator # for .then
55
+
56
  print(assistant_prefix, end=" ", flush=True)
57
 
58
  response = ""
 
94
  max_new_tokens=512, # adjust as needed
95
  seed=42,
96
  reset=False, # reset history (cache)
97
+ stream=True, # TODO stream=False and generator
98
  threads=os.cpu_count() // 2, # type: ignore # adjust for your CPU
99
  stop=["<|im_end|>", "|<"],
100
  )
101
+
102
+ # TODO stream does not make sense in api?
103
  generator = generate(
104
  LLM, _, system_prompt=default_system_prompt, user_prompt=prompt.strip()
105
  )
 
149
 
150
  def format_prompt(system_prompt: str, user_prompt: str):
151
  """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
152
+ # TODO im_start/im_end possible fix for WizardCoder
153
 
154
  system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
155
  user_prompt = f"<|im_start|>user\n{user_prompt}<|im_end|>\n"
 
234
  # threads=os.cpu_count() // 2 # type: ignore
235
  # )
236
 
237
+ cpu_count = os.cpu_count() // 2 # type: ignore
238
+ logger.debug(f"{cpu_count=}")
239
+
240
  GENERATION_CONFIG = GenerationConfig(
241
  temperature=0.2,
242
  top_k=0,
 
246
  seed=42,
247
  reset=False, # reset history (cache)
248
  stream=True, # streaming per word/token
249
+ threads=cpu_count,
250
+ stop=["<|im_end|>", "|<"], # TODO possible fix of stop
251
  )
252
 
253
  css = """
 
281
 
282
  Try to refresh the browser and try again when occasionally errors occur.
283
 
284
+ It takes about >100 seconds to get a response. Restarting the space takes about 2 minutes if the space is asleep due to inactivity. If the space crashes for some reason, it will also take about 2 minutes to restart. You need to refresh the browser to reload the new space.
285
  """,
286
  elem_classes="xsmall",
287
  )
288
 
289
+ # chatbot = gr.Chatbot().style(height=700) # 500
290
+ chatbot = gr.Chatbot(height=700) # 500
291
+ buff = gr.Textbox(show_label=False, visible=False)
292
  with gr.Row():
293
  with gr.Column(scale=4):
294
  msg = gr.Textbox(
 
296
  placeholder="Ask me anything (press Enter or click Submit to send)",
297
  show_label=False,
298
  ).style(container=False)
299
+ with gr.Column(scale=1, min_width=100):
300
  with gr.Row():
301
  submit = gr.Button("Submit", elem_classes="xsmall")
302
  stop = gr.Button("Stop", visible=False)
 
315
  change = gr.Button("Change System Prompt")
316
  reset = gr.Button("Reset System Prompt")
317
 
318
+ with gr.Accordion("Example Inputs", open=True):
319
  etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
320
  examples = gr.Examples(
321
  examples=[
 
367
  fn=predict,
368
  inputs=[msg, chatbot],
369
  outputs=[msg, chatbot],
370
+ # queue=True,
371
  show_progress="full",
372
  api_name="predict",
373
  )