Spaces:
Running
Running
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import snapshot_download | |
# πΉ Download & load the model from Hugging Face | |
model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning" | |
model_path = snapshot_download(repo_id=model_name, repo_type="model") | |
# πΉ Load the model & tokenizer | |
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto") | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# πΉ System prompt | |
SYSTEM_PROMPT = """ | |
Respond in the following format: | |
<reasoning> | |
... | |
</reasoning> | |
<answer> | |
... | |
</answer> | |
""" | |
# πΉ Function to generate response | |
def chat_response(user_input, top_p, top_k, temperature, max_length): | |
messages = [ | |
{"role": "system", "content": f"{SYSTEM_PROMPT}"}, | |
{"role": "user", "content": user_input} | |
] | |
# πΉ Format & tokenize input | |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
# πΉ Generate response | |
with torch.no_grad(): | |
output = model.generate( | |
**inputs, | |
max_length=max_length, | |
do_sample=True, | |
top_p=top_p, | |
top_k=top_k, | |
temperature=temperature | |
) | |
# πΉ Decode output | |
response = tokenizer.decode(output[0], skip_special_tokens=True) | |
return response | |
# πΉ Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# π€ Qwen-2.5-7B-Reasoning Chatbot") | |
with gr.Row(): | |
chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False) | |
with gr.Row(): | |
user_input = gr.Textbox(label="Your Prompt", placeholder="Ask me anything...", lines=2) | |
with gr.Accordion("π§ Advanced Settings", open=False): | |
top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p") | |
top_k = gr.Slider(1, 100, value=50, label="Top-k") | |
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") | |
max_length = gr.Slider(128, 1024, value=512, label="Max Length") | |
with gr.Row(): | |
submit_button = gr.Button("Generate Response") | |
submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot]) | |
# πΉ Launch the Gradio app | |
demo.launch() | |