orrzxz commited on
Commit
cfa8ab3
Β·
verified Β·
1 Parent(s): 728b3f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -69
app.py CHANGED
@@ -157,18 +157,11 @@ def process_input(
157
  def create_interface():
158
  """Create and configure Gradio interface"""
159
 
160
- with gr.Blocks(title="MiniCPM-V-4.5 Multimodal Chat", theme=gr.themes.Soft()) as iface:
161
  gr.Markdown("""
162
- # πŸš€ MiniCPM-V-4.5 Multimodal Chat
163
 
164
  A powerful 8B parameter multimodal model that can understand images and videos with GPT-4V level performance.
165
-
166
- **Features:**
167
- - πŸ“Έ Single/Multi-image understanding
168
- - πŸŽ₯ High refresh rate video understanding (up to 10 FPS)
169
- - πŸ“„ Strong OCR and document parsing
170
- - 🧠 Controllable fast/deep thinking mode
171
- - 🌍 Multilingual support (30+ languages)
172
  """)
173
 
174
  with gr.Row():
@@ -185,8 +178,7 @@ def create_interface():
185
  maximum=30,
186
  value=5,
187
  step=1,
188
- label="Video FPS",
189
- info="Frames per second for video processing (only applies to videos)"
190
  )
191
 
192
  # Context size
@@ -195,8 +187,7 @@ def create_interface():
195
  maximum=4096,
196
  value=2048,
197
  step=256,
198
- label="Max Output Tokens",
199
- info="Maximum number of tokens to generate"
200
  )
201
 
202
  # Temperature
@@ -205,15 +196,13 @@ def create_interface():
205
  maximum=2.0,
206
  value=0.7,
207
  step=0.1,
208
- label="Temperature",
209
- info="Controls randomness in generation"
210
  )
211
 
212
  # Thinking mode
213
  enable_thinking = gr.Checkbox(
214
  label="Enable Deep Thinking",
215
- value=False,
216
- info="Enable deep thinking mode for complex problem solving"
217
  )
218
 
219
  with gr.Column(scale=2):
@@ -221,8 +210,7 @@ def create_interface():
221
  system_prompt = gr.Textbox(
222
  label="System Prompt (Optional)",
223
  placeholder="Enter system instructions here...",
224
- lines=3,
225
- info="Set the behavior and context for the model"
226
  )
227
 
228
  # User prompt
@@ -233,31 +221,13 @@ def create_interface():
233
  )
234
 
235
  # Submit button
236
- submit_btn = gr.Button("πŸš€ Generate Response", variant="primary", size="lg")
237
 
238
  # Output
239
  output = gr.Textbox(
240
  label="Model Response",
241
- lines=15,
242
- max_lines=25,
243
- show_copy_button=True
244
  )
245
-
246
- # Examples
247
- gr.Markdown("## πŸ’‘ Example Prompts")
248
- gr.Examples(
249
- examples=[
250
- ["What objects do you see in this image?"],
251
- ["Describe the scene in detail."],
252
- ["What is the main action happening in this video?"],
253
- ["Read and transcribe any text visible in the image."],
254
- ["What emotions or mood does this image convey?"],
255
- ["Analyze the composition and visual elements."],
256
- ["What might happen next in this sequence?"]
257
- ],
258
- inputs=[user_prompt],
259
- label="Click any example to use it"
260
- )
261
 
262
  # Event handlers
263
  submit_btn.click(
@@ -271,11 +241,9 @@ def create_interface():
271
  temperature,
272
  enable_thinking
273
  ],
274
- outputs=output,
275
- show_progress=True
276
  )
277
 
278
- # Also allow Enter key submission
279
  user_prompt.submit(
280
  fn=process_input,
281
  inputs=[
@@ -287,37 +255,12 @@ def create_interface():
287
  temperature,
288
  enable_thinking
289
  ],
290
- outputs=output,
291
- show_progress=True
292
  )
293
-
294
- # Information section
295
- with gr.Accordion("πŸ“‹ Model Information", open=False):
296
- gr.Markdown("""
297
- ### MiniCPM-V-4.5 Specifications
298
-
299
- - **Parameters**: 8B (Qwen3-8B + SigLIP2-400M)
300
- - **Video Compression**: 96x compression rate (6 frames β†’ 64 tokens)
301
- - **Max Resolution**: Up to 1.8M pixels (1344x1344)
302
- - **Languages**: 30+ languages supported
303
- - **Performance**: Surpasses GPT-4o-latest on multiple benchmarks
304
-
305
- ### Usage Tips
306
-
307
- 1. **For Images**: Upload any image format and ask questions about content, objects, text, or analysis
308
- 2. **For Videos**: Adjust FPS based on video content (higher FPS for action, lower for static scenes)
309
- 3. **System Prompt**: Use to set specific roles like "You are an expert art critic" or "Analyze this from a medical perspective"
310
- 4. **Deep Thinking**: Enable for complex reasoning tasks, analysis, or problem-solving
311
- 5. **Temperature**: Lower (0.1-0.3) for factual responses, higher (0.7-1.0) for creative outputs
312
-
313
- ### Supported Formats
314
- - **Images**: JPG, PNG, JPEG, BMP, GIF, WEBP
315
- - **Videos**: MP4, AVI, MOV, MKV, WEBM, M4V
316
- """)
317
 
318
  return iface
319
 
320
  if __name__ == "__main__":
321
  # Create and launch interface
322
  demo = create_interface()
323
- demo.launch()
 
157
  def create_interface():
158
  """Create and configure Gradio interface"""
159
 
160
+ with gr.Blocks(title="MiniCPM-V-4.5 Multimodal Chat") as iface:
161
  gr.Markdown("""
162
+ # MiniCPM-V-4.5 Multimodal Chat
163
 
164
  A powerful 8B parameter multimodal model that can understand images and videos with GPT-4V level performance.
 
 
 
 
 
 
 
165
  """)
166
 
167
  with gr.Row():
 
178
  maximum=30,
179
  value=5,
180
  step=1,
181
+ label="Video FPS"
 
182
  )
183
 
184
  # Context size
 
187
  maximum=4096,
188
  value=2048,
189
  step=256,
190
+ label="Max Output Tokens"
 
191
  )
192
 
193
  # Temperature
 
196
  maximum=2.0,
197
  value=0.7,
198
  step=0.1,
199
+ label="Temperature"
 
200
  )
201
 
202
  # Thinking mode
203
  enable_thinking = gr.Checkbox(
204
  label="Enable Deep Thinking",
205
+ value=False
 
206
  )
207
 
208
  with gr.Column(scale=2):
 
210
  system_prompt = gr.Textbox(
211
  label="System Prompt (Optional)",
212
  placeholder="Enter system instructions here...",
213
+ lines=3
 
214
  )
215
 
216
  # User prompt
 
221
  )
222
 
223
  # Submit button
224
+ submit_btn = gr.Button("Generate Response", variant="primary")
225
 
226
  # Output
227
  output = gr.Textbox(
228
  label="Model Response",
229
+ lines=15
 
 
230
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  # Event handlers
233
  submit_btn.click(
 
241
  temperature,
242
  enable_thinking
243
  ],
244
+ outputs=output
 
245
  )
246
 
 
247
  user_prompt.submit(
248
  fn=process_input,
249
  inputs=[
 
255
  temperature,
256
  enable_thinking
257
  ],
258
+ outputs=output
 
259
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  return iface
262
 
263
  if __name__ == "__main__":
264
  # Create and launch interface
265
  demo = create_interface()
266
+ demo.launch(share=True)