import torch import gradio as gr from PIL import Image from transformers import AutoModel, AutoTokenizer # Initialize model and tokenizer model = AutoModel.from_pretrained('openbmb/MiniCPM-V-4_5', trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16) model = model.eval().cuda() tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-4_5', trust_remote_code=True) # Default chat parameters enable_thinking = False def chat_with_model(image, question, history): """ Chat with the MiniCPM model. Args: image: PIL Image or None question: str, user's text question history: list of previous conversation turns Returns: str: model's response list: updated conversation history """ # Build messages with history context msgs = [] for h in history: msgs.append({"role": "user", "content": h[0]}) msgs.append({"role": "assistant", "content": h[1]}) # Add current user message if image is not None: msgs.append({"role": "user", "content": [image, question]}) else: msgs.append({"role": "user", "content": question}) # Generate model response answer = model.chat( msgs=msgs, tokenizer=tokenizer, enable_thinking=enable_thinking ) # Update history history.append((question if image is None else [image, question], answer)) return answer, history # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# MiniCPM-V-4.5 Chat Interface") gr.Markdown("Upload an image and ask questions, or chat without an image") # Store conversation history chat_history = gr.State([]) with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Image (optional)") question_input = gr.Textbox(label="Your Question", placeholder="Enter your question here...") submit_btn = gr.Button("Submit") clear_btn = gr.Button("Clear History") with gr.Column(): response_output = gr.Textbox(label="Model Response", interactive=False) chat_display = gr.Chatbot(label="Conversation History") # Handle submit action submit_btn.click( fn=chat_with_model, inputs=[image_input, question_input, chat_history], outputs=[response_output, chat_history] ).then( fn=lambda history: history, inputs=[chat_history], outputs=[chat_display] ) # Clear history def clear_history(): return [], [], "" clear_btn.click( fn=clear_history, outputs=[chat_history, chat_display, response_output] ) # Launch demo if __name__ == "__main__": demo.launch()