HyperX-Sen commited on
Commit
ea29aa7
ยท
verified ยท
1 Parent(s): fb63550

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -16
app.py CHANGED
@@ -19,6 +19,10 @@ import gradio as gr
19
  import torch
20
  from transformers import AutoModelForCausalLM, AutoTokenizer
21
  from huggingface_hub import snapshot_download
 
 
 
 
22
 
23
  # ๐Ÿ”น Download & load the model from Hugging Face
24
  model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning"
@@ -39,36 +43,44 @@ Respond in the following format:
39
  </answer>
40
  """
41
 
42
- # ๐Ÿ”น Function to generate response
 
 
 
 
 
 
 
 
43
  def chat_response(user_input, top_p, top_k, temperature, max_length):
44
  messages = [
45
  {"role": "system", "content": f"{SYSTEM_PROMPT}"},
46
  {"role": "user", "content": user_input}
47
  ]
48
-
49
- # ๐Ÿ”น Format & tokenize input
50
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
51
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
52
-
53
- # ๐Ÿ”น Generate response
54
  with torch.no_grad():
55
- output = model.generate(
56
  **inputs,
57
  max_length=max_length,
58
  do_sample=True,
59
  top_p=top_p,
60
  top_k=top_k,
61
- temperature=temperature
 
62
  )
63
-
64
- # ๐Ÿ”น Decode output
65
- response = tokenizer.decode(output[0], skip_special_tokens=True)
66
- return response
 
67
 
68
  # ๐Ÿ”น Gradio UI
69
  with gr.Blocks() as demo:
70
- gr.Markdown("# ๐Ÿค– Qwen-2.5-7B-Reasoning Chatbot")
71
-
72
  with gr.Row():
73
  chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
74
 
@@ -80,11 +92,11 @@ with gr.Blocks() as demo:
80
  top_k = gr.Slider(1, 100, value=50, label="Top-k")
81
  temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
82
  max_length = gr.Slider(128, 1024, value=512, label="Max Length")
83
-
84
  with gr.Row():
85
  submit_button = gr.Button("Generate Response")
86
-
87
- submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
88
 
89
  # ๐Ÿ”น Launch the Gradio app
90
  demo.launch()
 
19
  import torch
20
  from transformers import AutoModelForCausalLM, AutoTokenizer
21
  from huggingface_hub import snapshot_download
22
+ import re
23
+
24
+ # ๐Ÿ”น Set torch num threads to max
25
+ torch.set_num_threads(torch.get_num_threads())
26
 
27
  # ๐Ÿ”น Download & load the model from Hugging Face
28
  model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning"
 
43
  </answer>
44
  """
45
 
46
+ # ๐Ÿ”น Function to extract reasoning and answer
47
+ def extract_response(full_response):
48
+ reasoning_match = re.search(r"<reasoning>(.*?)</reasoning>", full_response, re.DOTALL)
49
+ answer_match = re.search(r"<answer>(.*?)</answer>", full_response, re.DOTALL)
50
+ reasoning = reasoning_match.group(1).strip() if reasoning_match else ""
51
+ answer = answer_match.group(1).strip() if answer_match else ""
52
+ return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"
53
+
54
+ # ๐Ÿ”น Streaming response function
55
  def chat_response(user_input, top_p, top_k, temperature, max_length):
56
  messages = [
57
  {"role": "system", "content": f"{SYSTEM_PROMPT}"},
58
  {"role": "user", "content": user_input}
59
  ]
60
+
 
61
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
62
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
63
+
 
64
  with torch.no_grad():
65
+ stream = model.generate(
66
  **inputs,
67
  max_length=max_length,
68
  do_sample=True,
69
  top_p=top_p,
70
  top_k=top_k,
71
+ temperature=temperature,
72
+ streamer=True
73
  )
74
+
75
+ full_response = ""
76
+ for token in stream:
77
+ full_response += tokenizer.decode(token, skip_special_tokens=True)
78
+ yield extract_response(full_response)
79
 
80
  # ๐Ÿ”น Gradio UI
81
  with gr.Blocks() as demo:
82
+ gr.Markdown("# ๐Ÿค– Qwen-2.5-7B-Reasoning Chatbot (Streaming)")
83
+
84
  with gr.Row():
85
  chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
86
 
 
92
  top_k = gr.Slider(1, 100, value=50, label="Top-k")
93
  temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
94
  max_length = gr.Slider(128, 1024, value=512, label="Max Length")
95
+
96
  with gr.Row():
97
  submit_button = gr.Button("Generate Response")
98
+
99
+ submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot], stream=True)
100
 
101
  # ๐Ÿ”น Launch the Gradio app
102
  demo.launch()