import subprocess import sys # Function to install dependencies if missing def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", package]) # List of required libraries required_packages = ["transformers", "torch", "accelerate", "gradio", "huggingface_hub"] # Install any missing packages for package in required_packages: try: __import__(package) except ImportError: install(package) import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import snapshot_download import re # 🔹 Set torch num threads to max torch.set_num_threads(torch.get_num_threads()) # 🔹 Download & load the model from Hugging Face model_name = "HyperX-Sen/Qwen-2.5-7B-Reasoning" model_path = snapshot_download(repo_id=model_name, repo_type="model") # 🔹 Load the model & tokenizer model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto") tokenizer = AutoTokenizer.from_pretrained(model_path) # 🔹 System prompt SYSTEM_PROMPT = """ Respond in the following format: ... ... """ # 🔹 Function to extract reasoning and answer def extract_response(full_response): reasoning_match = re.search(r"(.*?)", full_response, re.DOTALL) answer_match = re.search(r"(.*?)", full_response, re.DOTALL) reasoning = reasoning_match.group(1).strip() if reasoning_match else "" answer = answer_match.group(1).strip() if answer_match else "" return f"\n{reasoning}\n\n\n{answer}\n" # 🔹 Function to generate response def chat_response(user_input, top_p, top_k, temperature, max_length): messages = [ {"role": "system", "content": f"{SYSTEM_PROMPT}"}, {"role": "user", "content": user_input} ] input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(input_text, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( **inputs, max_length=max_length, do_sample=True, top_p=top_p, top_k=top_k, temperature=temperature ) full_response = tokenizer.decode(output[0], skip_special_tokens=True) return extract_response(full_response.replace(SYSTEM_PROMPT, "")) # 🔹 Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🤖 Qwen-2.5-7B-Reasoning Chatbot") with gr.Row(): chatbot = gr.Textbox(label="Model Response", lines=8, interactive=False) with gr.Row(): user_input = gr.Textbox(label="Your Prompt", placeholder="Ask me anything...", lines=2) with gr.Accordion("🔧 Advanced Settings", open=False): top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p") top_k = gr.Slider(1, 100, value=50, label="Top-k") temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") max_length = gr.Slider(128, 1024, value=512, label="Max Length") with gr.Row(): submit_button = gr.Button("Generate Response") submit_button.click(chat_response, inputs=[user_input, top_p, top_k, temperature, max_length], outputs=[chatbot]) # 🔹 Launch the Gradio app demo.launch()