MiniCPM-V-4_5

Running

App Files Files Community

orrzxz commited on 10 days ago

Commit

cfa8ab3

verified ·

1 Parent(s): 728b3f2

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -69

app.py CHANGED Viewed

@@ -157,18 +157,11 @@ def process_input(
 def create_interface():
     """Create and configure Gradio interface"""
-    with gr.Blocks(title="MiniCPM-V-4.5 Multimodal Chat", theme=gr.themes.Soft()) as iface:
         gr.Markdown("""
-        # 🚀 MiniCPM-V-4.5 Multimodal Chat
         A powerful 8B parameter multimodal model that can understand images and videos with GPT-4V level performance.
-        **Features:**
-        - 📸 Single/Multi-image understanding
-        - 🎥 High refresh rate video understanding (up to 10 FPS)
-        - 📄 Strong OCR and document parsing
-        - 🧠 Controllable fast/deep thinking mode
-        - 🌍 Multilingual support (30+ languages)
         """)
         with gr.Row():
@@ -185,8 +178,7 @@ def create_interface():
                     maximum=30,
                     value=5,
                     step=1,
-                    label="Video FPS",
-                    info="Frames per second for video processing (only applies to videos)"
                 )
                 # Context size
@@ -195,8 +187,7 @@ def create_interface():
                     maximum=4096,
                     value=2048,
                     step=256,
-                    label="Max Output Tokens",
-                    info="Maximum number of tokens to generate"
                 )
                 # Temperature
@@ -205,15 +196,13 @@ def create_interface():
                     maximum=2.0,
                     value=0.7,
                     step=0.1,
-                    label="Temperature",
-                    info="Controls randomness in generation"
                 )
                 # Thinking mode
                 enable_thinking = gr.Checkbox(
                     label="Enable Deep Thinking",
-                    value=False,
-                    info="Enable deep thinking mode for complex problem solving"
                 )
             with gr.Column(scale=2):
@@ -221,8 +210,7 @@ def create_interface():
                 system_prompt = gr.Textbox(
                     label="System Prompt (Optional)",
                     placeholder="Enter system instructions here...",
-                    lines=3,
-                    info="Set the behavior and context for the model"
                 )
                 # User prompt
@@ -233,31 +221,13 @@ def create_interface():
                 )
                 # Submit button
-                submit_btn = gr.Button("🚀 Generate Response", variant="primary", size="lg")
                 # Output
                 output = gr.Textbox(
                     label="Model Response",
-                    lines=15,
-                    max_lines=25,
-                    show_copy_button=True
                 )
-        # Examples
-        gr.Markdown("## 💡 Example Prompts")
-        gr.Examples(
-            examples=[
-                ["What objects do you see in this image?"],
-                ["Describe the scene in detail."],
-                ["What is the main action happening in this video?"],
-                ["Read and transcribe any text visible in the image."],
-                ["What emotions or mood does this image convey?"],
-                ["Analyze the composition and visual elements."],
-                ["What might happen next in this sequence?"]
-            ],
-            inputs=[user_prompt],
-            label="Click any example to use it"
-        )
         # Event handlers
         submit_btn.click(
@@ -271,11 +241,9 @@ def create_interface():
                 temperature,
                 enable_thinking
             ],
-            outputs=output,
-            show_progress=True
         )
-        # Also allow Enter key submission
         user_prompt.submit(
             fn=process_input,
             inputs=[
@@ -287,37 +255,12 @@ def create_interface():
                 temperature,
                 enable_thinking
             ],
-            outputs=output,
-            show_progress=True
         )
-        # Information section
-        with gr.Accordion("📋 Model Information", open=False):
-            gr.Markdown("""
-            ### MiniCPM-V-4.5 Specifications
-            - **Parameters**: 8B (Qwen3-8B + SigLIP2-400M)
-            - **Video Compression**: 96x compression rate (6 frames → 64 tokens)
-            - **Max Resolution**: Up to 1.8M pixels (1344x1344)
-            - **Languages**: 30+ languages supported
-            - **Performance**: Surpasses GPT-4o-latest on multiple benchmarks
-            ### Usage Tips
-            1. **For Images**: Upload any image format and ask questions about content, objects, text, or analysis
-            2. **For Videos**: Adjust FPS based on video content (higher FPS for action, lower for static scenes)
-            3. **System Prompt**: Use to set specific roles like "You are an expert art critic" or "Analyze this from a medical perspective"
-            4. **Deep Thinking**: Enable for complex reasoning tasks, analysis, or problem-solving
-            5. **Temperature**: Lower (0.1-0.3) for factual responses, higher (0.7-1.0) for creative outputs
-            ### Supported Formats
-            - **Images**: JPG, PNG, JPEG, BMP, GIF, WEBP
-            - **Videos**: MP4, AVI, MOV, MKV, WEBM, M4V
-            """)
     return iface
 if __name__ == "__main__":
     # Create and launch interface
     demo = create_interface()
-    demo.launch()

 def create_interface():
     """Create and configure Gradio interface"""
+    with gr.Blocks(title="MiniCPM-V-4.5 Multimodal Chat") as iface:
         gr.Markdown("""
+        # MiniCPM-V-4.5 Multimodal Chat
         A powerful 8B parameter multimodal model that can understand images and videos with GPT-4V level performance.
         """)
         with gr.Row():
                     maximum=30,
                     value=5,
                     step=1,
+                    label="Video FPS"
                 )
                 # Context size
                     maximum=4096,
                     value=2048,
                     step=256,
+                    label="Max Output Tokens"
                 )
                 # Temperature
                     maximum=2.0,
                     value=0.7,
                     step=0.1,
+                    label="Temperature"
                 )
                 # Thinking mode
                 enable_thinking = gr.Checkbox(
                     label="Enable Deep Thinking",
+                    value=False
                 )
             with gr.Column(scale=2):
                 system_prompt = gr.Textbox(
                     label="System Prompt (Optional)",
                     placeholder="Enter system instructions here...",
+                    lines=3
                 )
                 # User prompt
                 )
                 # Submit button
+                submit_btn = gr.Button("Generate Response", variant="primary")
                 # Output
                 output = gr.Textbox(
                     label="Model Response",
+                    lines=15
                 )
         # Event handlers
         submit_btn.click(
                 temperature,
                 enable_thinking
             ],
+            outputs=output
         )
         user_prompt.submit(
             fn=process_input,
             inputs=[
                 temperature,
                 enable_thinking
             ],
+            outputs=output
         )
     return iface
 if __name__ == "__main__":
     # Create and launch interface
     demo = create_interface()
+    demo.launch(share=True)