Spaces:

HyperX-Sen
/

Qwen2.5-7b-Reasoning

Sleeping

App Files Files Community

HyperX-Sen commited on 7 days ago

Commit

b10cf74

verified ·

1 Parent(s): a2b4206

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import snapshot_download
+# 🔹 Download & load the model from Hugging Face
+model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning"
+model_path = snapshot_download(repo_id=model_name, repo_type="model")
+# 🔹 Load the model & tokenizer
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+# 🔹 System prompt
+SYSTEM_PROMPT = """
+Respond in the following format:
+<reasoning>
+...
+</reasoning>
+<answer>
+...
+</answer>
+"""
+# 🔹 Function to generate response
+def chat_response(user_input, top_p, top_k, temperature, max_length):
+    messages = [
+        {"role": "system", "content": f"{SYSTEM_PROMPT}"},
+        {"role": "user", "content": user_input}
+    ]
+    # 🔹 Format & tokenize input
+    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    # 🔹 Generate response
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_length=max_length,
+            do_sample=True,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature
+        )
+    # 🔹 Decode output
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response
+# 🔹 Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot")
+    with gr.Row():
+        chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False)
+    with gr.Row():
+        user_input = gr.Textbox(label="Your Prompt", placeholder="Ask me anything...", lines=2)
+    with gr.Accordion("🔧 Advanced Settings", open=False):
+        top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p")
+        top_k = gr.Slider(1, 100, value=50, label="Top-k")
+        temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature")
+        max_length = gr.Slider(128, 1024, value=512, label="Max Length")
+    with gr.Row():
+        submit_button = gr.Button("Generate Response")
+    submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot])
+# 🔹 Launch the Gradio app
+demo.launch()