import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load model with 4-bit quantization (if needed) model = AutoModelForCausalLM.from_pretrained( "mistralai/Mistral-7B-Instruct-v0.1", device_map="auto", torch_dtype=torch.float16, load_in_4bit=True ) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") def respond(message, history): # Convert history to messages format messages = [] for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) # Generate response inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") outputs = model.generate(inputs, max_new_tokens=200) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create chat interface with correct message format demo = gr.ChatInterface( respond, chatbot=gr.Chatbot(type="messages"), # ← This fixes the warning title="Mistral 7B Chatbot" ) if __name__ == "__main__": demo.launch()