Spaces:

HyperX-Sen
/

Qwen2.5-7b-Reasoning

Sleeping

App Files Files Community

HyperX-Sen commited on 6 days ago

Commit

a4f9a8d

verified ·

1 Parent(s): 5c37b73

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -9

app.py CHANGED Viewed

@@ -51,7 +51,7 @@ def extract_response(full_response):
     answer = answer_match.group(1).strip() if answer_match else ""
     return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"
-# 🔹 Streaming response function
 def chat_response(user_input, top_p, top_k, temperature, max_length):
     messages = [
         {"role": "system", "content": f"{SYSTEM_PROMPT}"},
@@ -61,23 +61,22 @@ def chat_response(user_input, top_p, top_k, temperature, max_length):
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-    full_response = ""
     with torch.no_grad():
-        for token in model.generate(
             **inputs,
             max_length=max_length,
             do_sample=True,
             top_p=top_p,
             top_k=top_k,
             temperature=temperature
-        ):
-            full_response += tokenizer.decode(token, skip_special_tokens=True)
-            yield gr.Textbox.update(value=extract_response(full_response))
 # 🔹 Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot (Streaming)")
     with gr.Row():
         chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
@@ -97,4 +96,4 @@ with gr.Blocks() as demo:
     submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
 # 🔹 Launch the Gradio app
-demo.launch()

     answer = answer_match.group(1).strip() if answer_match else ""
     return f"<reasoning>\n{reasoning}\n</reasoning>\n<answer>\n{answer}\n</answer>"
+# 🔹 Function to generate response
 def chat_response(user_input, top_p, top_k, temperature, max_length):
     messages = [
         {"role": "system", "content": f"{SYSTEM_PROMPT}"},
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
     with torch.no_grad():
+        output = model.generate(
             **inputs,
             max_length=max_length,
             do_sample=True,
             top_p=top_p,
             top_k=top_k,
             temperature=temperature
+        )
+    full_response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return extract_response(full_response)
 # 🔹 Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot")
     with gr.Row():
         chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
     submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
 # 🔹 Launch the Gradio app
+demo.launch()