import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch # Load base model base_model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=torch.float16, device_map="auto" ) # Load LoRA adapter model = PeftModel.from_pretrained(base_model, "gaurav2003/room-service-chatbot") # Tokenizer tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") tokenizer.pad_token = tokenizer.eos_token def chat(user_input, history=[]): input_ids = tokenizer(user_input, return_tensors="pt", padding=True).input_ids.to(model.device) output = model.generate(input_ids, max_new_tokens=150, do_sample=True, temperature=0.7) response = tokenizer.decode(output[0], skip_special_tokens=True) return response iface = gr.Interface( fn=chat, inputs=gr.Textbox(placeholder="Ask something...", lines=2), outputs="text", title="Room Service Chatbot", description="Ask anything related to your stay or room service." ) if __name__ == "__main__": iface.launch()