akhaliq HF Staff commited on
Commit
1bf7e3b
·
verified ·
1 Parent(s): 4ba551f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import AutoModel, AutoTokenizer
5
+ import spaces
6
+
7
+ # Initialize model and tokenizer
8
+ torch.manual_seed(100)
9
+
10
+ model = AutoModel.from_pretrained(
11
+ 'openbmb/MiniCPM-V-4_5',
12
+ trust_remote_code=True,
13
+ attn_implementation='sdpa',
14
+ torch_dtype=torch.bfloat16
15
+ )
16
+ model = model.eval().cuda()
17
+ tokenizer = AutoTokenizer.from_pretrained(
18
+ 'openbmb/MiniCPM-V-4_5',
19
+ trust_remote_code=True
20
+ )
21
+
22
+ @spaces.GPU
23
+ def respond(message, history, enable_thinking):
24
+ """
25
+ Process user message and generate response
26
+ """
27
+ # Build conversation history in the format expected by the model
28
+ msgs = []
29
+
30
+ # Add previous conversation history
31
+ for h in history:
32
+ user_msg = h[0]
33
+ assistant_msg = h[1]
34
+
35
+ # Parse user message for images and text
36
+ user_content = []
37
+ if isinstance(user_msg, tuple):
38
+ # If user message contains an image
39
+ img_path, text = user_msg
40
+ img = Image.open(img_path).convert('RGB')
41
+ user_content = [img, text] if text else [img]
42
+ else:
43
+ # Text only message
44
+ user_content = [user_msg]
45
+
46
+ msgs.append({"role": "user", "content": user_content})
47
+ if assistant_msg:
48
+ msgs.append({"role": "assistant", "content": [assistant_msg]})
49
+
50
+ # Add current message
51
+ current_content = []
52
+ if isinstance(message, dict):
53
+ # Handle multimodal input
54
+ if message.get("files"):
55
+ for file_path in message["files"]:
56
+ img = Image.open(file_path).convert('RGB')
57
+ current_content.append(img)
58
+ if message.get("text"):
59
+ current_content.append(message["text"])
60
+ else:
61
+ # Handle text-only input
62
+ current_content = [message]
63
+
64
+ msgs.append({"role": "user", "content": current_content})
65
+
66
+ # Generate response
67
+ try:
68
+ answer = model.chat(
69
+ msgs=msgs,
70
+ tokenizer=tokenizer,
71
+ enable_thinking=enable_thinking
72
+ )
73
+ return answer
74
+ except Exception as e:
75
+ return f"Error: {str(e)}"
76
+
77
+ # Create Gradio interface
78
+ with gr.Blocks(title="MiniCPM-V Chatbot") as demo:
79
+ gr.Markdown(
80
+ """
81
+ # 🤖 MiniCPM-V Multimodal Chatbot
82
+
83
+ Upload images and ask questions about them, or have a text conversation.
84
+ The model supports multi-turn conversations with context memory.
85
+ """
86
+ )
87
+
88
+ with gr.Row():
89
+ with gr.Column(scale=4):
90
+ chatbot = gr.Chatbot(
91
+ height=500,
92
+ show_label=False,
93
+ container=True,
94
+ type="tuples"
95
+ )
96
+
97
+ with gr.Row():
98
+ msg = gr.MultimodalTextbox(
99
+ interactive=True,
100
+ file_types=["image"],
101
+ placeholder="Type a message or upload an image...",
102
+ show_label=False,
103
+ container=False
104
+ )
105
+
106
+ with gr.Row():
107
+ clear = gr.Button("🗑️ Clear", size="sm")
108
+ submit = gr.Button("📤 Send", variant="primary", size="sm")
109
+
110
+ with gr.Column(scale=1):
111
+ gr.Markdown("### Settings")
112
+ enable_thinking = gr.Checkbox(
113
+ label="Enable Thinking Mode",
114
+ value=False,
115
+ info="Enable the model's thinking process"
116
+ )
117
+
118
+ gr.Markdown(
119
+ """
120
+ ### Examples
121
+ - Upload an image and ask "What is in this picture?"
122
+ - Ask "What are the main objects visible?"
123
+ - Follow up with "What should I pay attention to here?"
124
+ """
125
+ )
126
+
127
+ # Handle message submission
128
+ def user_submit(message, history, enable_thinking):
129
+ # Format the user message for display
130
+ if isinstance(message, dict) and message.get("files"):
131
+ # If there are files, create tuple format for chatbot display
132
+ user_msg = (message["files"][0], message.get("text", ""))
133
+ else:
134
+ user_msg = message.get("text", "") if isinstance(message, dict) else message
135
+
136
+ # Add user message to history
137
+ history = history + [(user_msg, None)]
138
+
139
+ # Generate response
140
+ response = respond(message, history[:-1], enable_thinking)
141
+
142
+ # Update history with response
143
+ history[-1] = (history[-1][0], response)
144
+
145
+ return "", history
146
+
147
+ # Event handlers
148
+ msg.submit(
149
+ user_submit,
150
+ inputs=[msg, chatbot, enable_thinking],
151
+ outputs=[msg, chatbot]
152
+ )
153
+
154
+ submit.click(
155
+ user_submit,
156
+ inputs=[msg, chatbot, enable_thinking],
157
+ outputs=[msg, chatbot]
158
+ )
159
+
160
+ clear.click(
161
+ lambda: (None, []),
162
+ outputs=[msg, chatbot]
163
+ )
164
+
165
+ if __name__ == "__main__":
166
+ demo.launch(share=True)