Spaces:

HyperX-Sen
/

Qwen2.5-7b-Reasoning

Running

HyperX-Sen commited on 8 days ago

Commit

5c37b73

verified ·

1 Parent(s): ea29aa7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -61,21 +61,19 @@ def chat_response(user_input, top_p, top_k, temperature, max_length):
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
     with torch.no_grad():
-        stream = model.generate(
             **inputs,
             max_length=max_length,
             do_sample=True,
             top_p=top_p,
             top_k=top_k,
-            temperature=temperature,
-            streamer=True
-        )
-    full_response = ""
-    for token in stream:
-        full_response += tokenizer.decode(token, skip_special_tokens=True)
-        yield extract_response(full_response)
 # 🔹 Gradio UI
 with gr.Blocks() as demo:
@@ -96,7 +94,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         submit_button = gr.Button("Generate Response")
-    submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot], stream=True)
 # 🔹 Launch the Gradio app
-demo.launch()

     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    full_response = ""
     with torch.no_grad():
+        for token in model.generate(
             **inputs,
             max_length=max_length,
             do_sample=True,
             top_p=top_p,
             top_k=top_k,
+            temperature=temperature
+        ):
+            full_response += tokenizer.decode(token, skip_special_tokens=True)
+            yield gr.Textbox.update(value=extract_response(full_response))
 # 🔹 Gradio UI
 with gr.Blocks() as demo:
     with gr.Row():
         submit_button = gr.Button("Generate Response")
+    submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
 # 🔹 Launch the Gradio app
+demo.launch()