HyperX-Sen commited on
Commit
a4f9a8d
·
verified ·
1 Parent(s): 5c37b73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -51,7 +51,7 @@ def extract_response(full_response):
51
  answer = answer_match.group(1).strip() if answer_match else ""
52
  return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"
53
 
54
- # 🔹 Streaming response function
55
  def chat_response(user_input, top_p, top_k, temperature, max_length):
56
  messages = [
57
  {"role": "system", "content": f"{SYSTEM_PROMPT}"},
@@ -61,23 +61,22 @@ def chat_response(user_input, top_p, top_k, temperature, max_length):
61
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
62
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
63
 
64
- full_response = ""
65
-
66
  with torch.no_grad():
67
- for token in model.generate(
68
  **inputs,
69
  max_length=max_length,
70
  do_sample=True,
71
  top_p=top_p,
72
  top_k=top_k,
73
  temperature=temperature
74
- ):
75
- full_response += tokenizer.decode(token, skip_special_tokens=True)
76
- yield gr.Textbox.update(value=extract_response(full_response))
 
77
 
78
  # 🔹 Gradio UI
79
  with gr.Blocks() as demo:
80
- gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot (Streaming)")
81
 
82
  with gr.Row():
83
  chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
@@ -97,4 +96,4 @@ with gr.Blocks() as demo:
97
  submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
98
 
99
  # 🔹 Launch the Gradio app
100
- demo.launch()
 
51
  answer = answer_match.group(1).strip() if answer_match else ""
52
  return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"
53
 
54
+ # 🔹 Function to generate response
55
  def chat_response(user_input, top_p, top_k, temperature, max_length):
56
  messages = [
57
  {"role": "system", "content": f"{SYSTEM_PROMPT}"},
 
61
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
62
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
63
 
 
 
64
  with torch.no_grad():
65
+ output = model.generate(
66
  **inputs,
67
  max_length=max_length,
68
  do_sample=True,
69
  top_p=top_p,
70
  top_k=top_k,
71
  temperature=temperature
72
+ )
73
+
74
+ full_response = tokenizer.decode(output[0], skip_special_tokens=True)
75
+ return extract_response(full_response)
76
 
77
  # 🔹 Gradio UI
78
  with gr.Blocks() as demo:
79
+ gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot")
80
 
81
  with gr.Row():
82
  chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
 
96
  submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
97
 
98
  # 🔹 Launch the Gradio app
99
+ demo.launch()