import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import snapshot_download # 🔹 Download & load the model from Hugging Face model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning" model_path = snapshot_download(repo_id=model_name, repo_type="model") # 🔹 Load the model & tokenizer model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_path) # 🔹 System prompt SYSTEM_PROMPT = """ Respond in the following format: ... ... """ # 🔹 Function to generate response def chat_response(user_input, top_p, top_k, temperature, max_length): messages = [ {"role": "system", "content": f"{SYSTEM_PROMPT}"}, {"role": "user", "content": user_input} ] # 🔹 Format & tokenize input input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(input_text, return_tensors="pt").to(model.device) # 🔹 Generate response with torch.no_grad(): output = model.generate( **inputs, max_length=max_length, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature ) # 🔹 Decode output response = tokenizer.decode(output[0], skip_special_tokens=True) return response # 🔹 Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot") with gr.Row(): chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False) with gr.Row(): user_input = gr.Textbox(label="Your Prompt", placeholder="Ask me anything...", lines=2) with gr.Accordion("🔧 Advanced Settings", open=False): top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p") top_k = gr.Slider(1, 100, value=50, label="Top-k") temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") max_length = gr.Slider(128, 1024, value=512, label="Max Length") with gr.Row(): submit_button = gr.Button("Generate Response") submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot]) # 🔹 Launch the Gradio app demo.launch()