Spaces:

satishpednekar
/

sbxcerthelper

Runtime error

App Files Files Community

sbxcerthelper / app.py

satishpednekar

Update app.py

7bff2c6 verified about 2 months ago

raw

history blame contribute delete

5.62 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from peft import PeftModel, PeftConfig

	# Model and tokenizer initialization
	MODEL_NAME = "satishpednekar/sbxcertqueryhelper"

	def load_model_org():
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
	# Modified model loading without 8-bit quantization
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16, # Use float32 instead of float16 for better compatibility
	device_map="auto",
	trust_remote_code=True,
	load_in_8bit=False
	# Removed load_in_8bit parameter
	)
	return model, tokenizer


	def load_model_gpu():
	# Load base model first
	base_model = AutoModelForCausalLM.from_pretrained(
	"unsloth/mistral-7b-v0.3", # Use your base model name
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	# Load the PEFT adapter weights
	model = PeftModel.from_pretrained(
	base_model,
	"satishpednekar/sbx-qhelper-mistral-loraWeights", # Path to your trained LoRA weights
	torch_dtype=torch.float16,
	device_map="auto"
	)

	tokenizer = AutoTokenizer.from_pretrained(
	"unsloth/mistral-7b-v0.3", # Use your base model name
	trust_remote_code=True
	)

	return model, tokenizer

	def load_model():
	config = PeftConfig.from_pretrained("satishpednekar/sbx-qhelper-mistral-loraWeights")

	model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	torch_dtype=torch.float32,
	device_map=None,
	trust_remote_code=True,
	# Remove all quantization-related parameters
	)

	model = PeftModel.from_pretrained(
	model,
	"satishpednekar/sbx-qhelper-mistral-loraWeights",
	torch_dtype=torch.float32
	)

	tokenizer = AutoTokenizer.from_pretrained(
	config.base_model_name_or_path,
	trust_remote_code=True
	)

	model = model.to("cpu").eval()

	return model, tokenizer



	# Initialize model and tokenizer
	print("Loading model...")
	model, tokenizer = load_model()
	print("Model loaded successfully!")

	def generate_response(prompt, max_length=512, temperature=0.7, top_p=0.95):
	"""
	Generate a response using the fine-tuned model
	"""
	try:
	# Prepare the input
	inputs = tokenizer(prompt, return_tensors="pt")
	if torch.cuda.is_available():
	inputs = inputs.to(model.device)

	# Generate
	outputs = model.generate(
	**inputs,
	max_length=max_length,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	num_return_sequences=1
	)

	# Decode the response
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Clean up the response by removing the prompt if it appears at the start
	if response.startswith(prompt):
	response = response[len(prompt):].strip()

	return response

	except Exception as e:
	return f"An error occurred: {str(e)}"

	# Create the Gradio interface
	def main():
	with gr.Blocks(title="SBX Certification Query Helper") as demo:
	gr.Markdown("""
	# SBX Certification Query Helper
	Ask questions about SBX certifications and get detailed answers!
	""")

	with gr.Row():
	with gr.Column():
	input_text = gr.Textbox(
	label="Your Question",
	placeholder="Enter your question about SBX certifications...",
	lines=3
	)

	with gr.Row():
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature",
	info="Higher values make output more random, lower values make it more focused"
	)

	max_length = gr.Slider(
	minimum=64,
	maximum=1024,
	value=512,
	step=64,
	label="Maximum Length",
	info="Maximum length of the generated response"
	)

	submit_btn = gr.Button("Get Answer", variant="primary")

	with gr.Column():
	output_text = gr.Textbox(
	label="Answer",
	lines=10,
	show_copy_button=True
	)

	# Set up the click event
	submit_btn.click(
	fn=generate_response,
	inputs=[input_text, max_length, temperature],
	outputs=output_text
	)

	gr.Markdown("""
	### Tips:
	- Be specific in your questions
	- Include the certification name if you're asking about a specific certification
	- Adjust the temperature slider to control response creativity
	""")

	return demo

	if __name__ == "__main__":
	demo = main()
	demo.launch(
	share=True, # Enable sharing
	enable_queue=True, # Enable queue for handling multiple requests
	server_name="0.0.0.0" # Listen on all network interfaces
	)