Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
from peft import PeftModel, PeftConfig | |
# Model and tokenizer initialization | |
MODEL_NAME = "satishpednekar/sbxcertqueryhelper" | |
def load_model_org(): | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
# Modified model loading without 8-bit quantization | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float16, # Use float32 instead of float16 for better compatibility | |
device_map="auto", | |
trust_remote_code=True, | |
load_in_8bit=False | |
# Removed load_in_8bit parameter | |
) | |
return model, tokenizer | |
def load_model_gpu(): | |
# Load base model first | |
base_model = AutoModelForCausalLM.from_pretrained( | |
"unsloth/mistral-7b-v0.3", # Use your base model name | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
# Load the PEFT adapter weights | |
model = PeftModel.from_pretrained( | |
base_model, | |
"satishpednekar/sbx-qhelper-mistral-loraWeights", # Path to your trained LoRA weights | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
"unsloth/mistral-7b-v0.3", # Use your base model name | |
trust_remote_code=True | |
) | |
return model, tokenizer | |
def load_model(): | |
config = PeftConfig.from_pretrained("satishpednekar/sbx-qhelper-mistral-loraWeights") | |
model = AutoModelForCausalLM.from_pretrained( | |
config.base_model_name_or_path, | |
torch_dtype=torch.float32, | |
device_map=None, | |
trust_remote_code=True, | |
# Remove all quantization-related parameters | |
) | |
model = PeftModel.from_pretrained( | |
model, | |
"satishpednekar/sbx-qhelper-mistral-loraWeights", | |
torch_dtype=torch.float32 | |
) | |
tokenizer = AutoTokenizer.from_pretrained( | |
config.base_model_name_or_path, | |
trust_remote_code=True | |
) | |
model = model.to("cpu").eval() | |
return model, tokenizer | |
# Initialize model and tokenizer | |
print("Loading model...") | |
model, tokenizer = load_model() | |
print("Model loaded successfully!") | |
def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.95): | |
""" | |
Generate a response using the fine-tuned model | |
""" | |
try: | |
# Prepare the input | |
inputs = tokenizer(prompt, return_tensors="pt") | |
if torch.cuda.is_available(): | |
inputs = inputs.to(model.device) | |
# Generate | |
outputs = model.generate( | |
**inputs, | |
max_length=max_length, | |
temperature=temperature, | |
top_p=top_p, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
num_return_sequences=1 | |
) | |
# Decode the response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Clean up the response by removing the prompt if it appears at the start | |
if response.startswith(prompt): | |
response = response[len(prompt):].strip() | |
return response | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Create the Gradio interface | |
def main(): | |
with gr.Blocks(title="SBX Certification Query Helper") as demo: | |
gr.Markdown(""" | |
# SBX Certification Query Helper | |
Ask questions about SBX certifications and get detailed answers! | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox( | |
label="Your Question", | |
placeholder="Enter your question about SBX certifications...", | |
lines=3 | |
) | |
with gr.Row(): | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.7, | |
step=0.1, | |
label="Temperature", | |
info="Higher values make output more random, lower values make it more focused" | |
) | |
max_length = gr.Slider( | |
minimum=64, | |
maximum=1024, | |
value=512, | |
step=64, | |
label="Maximum Length", | |
info="Maximum length of the generated response" | |
) | |
submit_btn = gr.Button("Get Answer", variant="primary") | |
with gr.Column(): | |
output_text = gr.Textbox( | |
label="Answer", | |
lines=10, | |
show_copy_button=True | |
) | |
# Set up the click event | |
submit_btn.click( | |
fn=generate_response, | |
inputs=[input_text, max_length, temperature], | |
outputs=output_text | |
) | |
gr.Markdown(""" | |
### Tips: | |
- Be specific in your questions | |
- Include the certification name if you're asking about a specific certification | |
- Adjust the temperature slider to control response creativity | |
""") | |
return demo | |
if __name__ == "__main__": | |
demo = main() | |
demo.launch( | |
share=True, # Enable sharing | |
enable_queue=True, # Enable queue for handling multiple requests | |
server_name="0.0.0.0" # Listen on all network interfaces | |
) |