Spaces:
Running
Running
Upload 7 files
Browse files- README.md +40 -0
- app.py +147 -0
- deploy.sh +65 -0
- download_examples.py +22 -0
- packages.txt +2 -0
- requirements.txt +4 -0
- style.css +69 -0
README.md
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: OpenRouter AI Vision Interface
|
3 |
+
emoji: 🔍
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.0.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
# OpenRouter AI Vision Interface
|
14 |
+
|
15 |
+
This is a Gradio-based web interface that allows you to analyze images using various AI models through the OpenRouter API.
|
16 |
+
|
17 |
+
## Features
|
18 |
+
|
19 |
+
- Upload and analyze images with AI models
|
20 |
+
- Choose from multiple vision-language models:
|
21 |
+
- Mistral Small
|
22 |
+
- Kimi Vision
|
23 |
+
- Gemini Pro
|
24 |
+
- Qwen VL
|
25 |
+
- Mistral 3.1
|
26 |
+
- Gemma
|
27 |
+
- Llama 3.2 Vision
|
28 |
+
- Simple and intuitive user interface
|
29 |
+
- Example images included
|
30 |
+
|
31 |
+
## Usage
|
32 |
+
|
33 |
+
1. Upload an image
|
34 |
+
2. Enter a question about the image
|
35 |
+
3. Select an AI model from the dropdown
|
36 |
+
4. Click "Analyze Image" to get the AI's response
|
37 |
+
|
38 |
+
## API Key
|
39 |
+
|
40 |
+
The OpenRouter API key is already included in the code. If you want to use your own API key, you can modify it in the `app.py` file.
|
app.py
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from openai import OpenAI
|
4 |
+
import base64
|
5 |
+
from PIL import Image
|
6 |
+
import io
|
7 |
+
|
8 |
+
# OpenRouter API key
|
9 |
+
OPENROUTER_API_KEY = "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"
|
10 |
+
|
11 |
+
# Available models
|
12 |
+
MODELS = {
|
13 |
+
"Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
|
14 |
+
"Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
|
15 |
+
"Gemini Pro": "google/gemini-2.5-pro-exp-03-25",
|
16 |
+
"Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free",
|
17 |
+
"Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
|
18 |
+
"Gemma": "google/gemma-3-4b-it:free",
|
19 |
+
"Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
|
20 |
+
}
|
21 |
+
|
22 |
+
def image_to_base64(image):
|
23 |
+
"""Convert an image to base64 encoding"""
|
24 |
+
# If image is a file path
|
25 |
+
if isinstance(image, str):
|
26 |
+
with open(image, "rb") as img_file:
|
27 |
+
return base64.b64encode(img_file.read()).decode()
|
28 |
+
|
29 |
+
# If image is already a PIL Image
|
30 |
+
buffered = io.BytesIO()
|
31 |
+
image.save(buffered, format="JPEG")
|
32 |
+
return base64.b64encode(buffered.getvalue()).decode()
|
33 |
+
|
34 |
+
def analyze_image(image, prompt, model_name):
|
35 |
+
"""Analyze an image using the selected OpenRouter model"""
|
36 |
+
try:
|
37 |
+
# Initialize OpenAI client with OpenRouter base URL
|
38 |
+
client = OpenAI(
|
39 |
+
base_url="https://openrouter.ai/api/v1",
|
40 |
+
api_key=OPENROUTER_API_KEY,
|
41 |
+
)
|
42 |
+
|
43 |
+
# Convert image to base64
|
44 |
+
img_base64 = image_to_base64(image)
|
45 |
+
|
46 |
+
# Create the completion request
|
47 |
+
completion = client.chat.completions.create(
|
48 |
+
extra_headers={
|
49 |
+
"HTTP-Referer": "https://gradio-openrouter-interface.com",
|
50 |
+
"X-Title": "Gradio OpenRouter Interface",
|
51 |
+
},
|
52 |
+
model=MODELS[model_name],
|
53 |
+
messages=[
|
54 |
+
{
|
55 |
+
"role": "user",
|
56 |
+
"content": [
|
57 |
+
{
|
58 |
+
"type": "text",
|
59 |
+
"text": prompt
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"type": "image_url",
|
63 |
+
"image_url": {
|
64 |
+
"url": f"data:image/jpeg;base64,{img_base64}"
|
65 |
+
}
|
66 |
+
}
|
67 |
+
]
|
68 |
+
}
|
69 |
+
]
|
70 |
+
)
|
71 |
+
|
72 |
+
# Return the model's response
|
73 |
+
return completion.choices[0].message.content
|
74 |
+
|
75 |
+
except Exception as e:
|
76 |
+
return f"Error: {str(e)}"
|
77 |
+
|
78 |
+
# Create the Gradio interface
|
79 |
+
with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css", theme=gr.themes.Soft()) as demo:
|
80 |
+
gr.Markdown(
|
81 |
+
"""
|
82 |
+
# 🔍 OpenRouter AI Vision Interface
|
83 |
+
|
84 |
+
Upload an image and ask a question about it. The AI will analyze the image and respond.
|
85 |
+
|
86 |
+
*Powered by OpenRouter API with multiple vision-language models*
|
87 |
+
"""
|
88 |
+
)
|
89 |
+
|
90 |
+
with gr.Row():
|
91 |
+
with gr.Column():
|
92 |
+
# Input components with custom styling
|
93 |
+
with gr.Box(elem_classes=["input-container"]):
|
94 |
+
image_input = gr.Image(type="pil", label="Upload Image", elem_classes=["image-upload-container"])
|
95 |
+
prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?")
|
96 |
+
model_dropdown = gr.Dropdown(
|
97 |
+
choices=list(MODELS.keys()),
|
98 |
+
value="Mistral Small",
|
99 |
+
label="Select AI Model",
|
100 |
+
info="Choose from different vision-language models"
|
101 |
+
)
|
102 |
+
submit_button = gr.Button("Analyze Image", variant="primary")
|
103 |
+
|
104 |
+
with gr.Column():
|
105 |
+
# Output component with custom styling
|
106 |
+
with gr.Box(elem_classes=["output-container"]):
|
107 |
+
output_text = gr.Textbox(label="AI Response", lines=12)
|
108 |
+
|
109 |
+
gr.Markdown(
|
110 |
+
"""
|
111 |
+
### Available Models
|
112 |
+
- **Mistral Small**: Powerful vision-language model from Mistral AI
|
113 |
+
- **Kimi Vision**: Specialized vision model from Moonshot AI
|
114 |
+
- **Gemini Pro**: Google's advanced multimodal model
|
115 |
+
- **Qwen VL**: Alibaba's vision-language model
|
116 |
+
- **Mistral 3.1**: Earlier version of Mistral's vision model
|
117 |
+
- **Gemma**: Google's lightweight vision model
|
118 |
+
- **Llama 3.2 Vision**: Meta's vision-enabled large language model
|
119 |
+
"""
|
120 |
+
)
|
121 |
+
|
122 |
+
# Set up the submit action
|
123 |
+
submit_button.click(
|
124 |
+
fn=analyze_image,
|
125 |
+
inputs=[image_input, prompt_input, model_dropdown],
|
126 |
+
outputs=output_text
|
127 |
+
)
|
128 |
+
|
129 |
+
# Add example
|
130 |
+
gr.Examples(
|
131 |
+
examples=[
|
132 |
+
["examples/nature.jpg", "What is in this image?", "Mistral Small"],
|
133 |
+
["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"],
|
134 |
+
],
|
135 |
+
inputs=[image_input, prompt_input, model_dropdown],
|
136 |
+
)
|
137 |
+
|
138 |
+
# Create examples directory if it doesn't exist
|
139 |
+
os.makedirs("examples", exist_ok=True)
|
140 |
+
|
141 |
+
# For Hugging Face Spaces compatibility
|
142 |
+
if __name__ == "__main__":
|
143 |
+
# Launch the interface
|
144 |
+
demo.launch(share=True)
|
145 |
+
else:
|
146 |
+
# For Hugging Face Spaces, we need to expose the app
|
147 |
+
app = demo.launch(share=False, show_api=False)
|
deploy.sh
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Script to deploy the application to Hugging Face Spaces
|
4 |
+
|
5 |
+
set -e
|
6 |
+
|
7 |
+
# Check if huggingface_hub is installed
|
8 |
+
if ! pip show huggingface_hub > /dev/null 2>&1; then
|
9 |
+
echo "Installing huggingface_hub..."
|
10 |
+
pip install huggingface_hub
|
11 |
+
fi
|
12 |
+
|
13 |
+
# Check if git is installed
|
14 |
+
if ! command -v git &> /dev/null; then
|
15 |
+
echo "Error: git is not installed. Please install git and try again."
|
16 |
+
exit 1
|
17 |
+
fi
|
18 |
+
|
19 |
+
# Get Hugging Face username
|
20 |
+
read -p "Enter your Hugging Face username: " HF_USERNAME
|
21 |
+
if [ -z "$HF_USERNAME" ]; then
|
22 |
+
echo "Error: Hugging Face username cannot be empty."
|
23 |
+
exit 1
|
24 |
+
fi
|
25 |
+
|
26 |
+
# Get Space name
|
27 |
+
read -p "Enter a name for your Hugging Face Space (lowercase, no spaces): " SPACE_NAME
|
28 |
+
if [ -z "$SPACE_NAME" ]; then
|
29 |
+
echo "Error: Space name cannot be empty."
|
30 |
+
exit 1
|
31 |
+
fi
|
32 |
+
|
33 |
+
# Convert to lowercase and replace spaces with dashes
|
34 |
+
SPACE_NAME=$(echo "$SPACE_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')
|
35 |
+
|
36 |
+
# Create examples directory
|
37 |
+
mkdir -p examples
|
38 |
+
|
39 |
+
# Download example images
|
40 |
+
python download_examples.py
|
41 |
+
|
42 |
+
# Initialize git repository if not already initialized
|
43 |
+
if [ ! -d .git ]; then
|
44 |
+
git init
|
45 |
+
fi
|
46 |
+
|
47 |
+
# Create Hugging Face Space using the API
|
48 |
+
echo "Creating Hugging Face Space..."
|
49 |
+
python -c "from huggingface_hub import create_repo; create_repo(repo_id='$HF_USERNAME/$SPACE_NAME', repo_type='space', space_sdk='gradio')"
|
50 |
+
|
51 |
+
# Add remote
|
52 |
+
git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME || git remote set-url origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
|
53 |
+
|
54 |
+
# Add all files
|
55 |
+
git add .
|
56 |
+
|
57 |
+
# Commit
|
58 |
+
git commit -m "Initial commit with proper configuration"
|
59 |
+
|
60 |
+
# Push to Hugging Face
|
61 |
+
echo "Pushing to Hugging Face Spaces..."
|
62 |
+
git push -u origin main --force
|
63 |
+
|
64 |
+
echo "Deployment complete! Your application is now available at: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
|
65 |
+
echo "Note: It may take a few minutes for the application to build and deploy."
|
download_examples.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from PIL import Image
|
4 |
+
from io import BytesIO
|
5 |
+
|
6 |
+
# Create examples directory if it doesn't exist
|
7 |
+
os.makedirs("examples", exist_ok=True)
|
8 |
+
|
9 |
+
# URL of the example image
|
10 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
11 |
+
|
12 |
+
# Download and save the image
|
13 |
+
response = requests.get(image_url)
|
14 |
+
if response.status_code == 200:
|
15 |
+
# Open the image from the response content
|
16 |
+
img = Image.open(BytesIO(response.content))
|
17 |
+
|
18 |
+
# Save the image to the examples directory
|
19 |
+
img.save("examples/nature.jpg")
|
20 |
+
print("Example image downloaded successfully!")
|
21 |
+
else:
|
22 |
+
print(f"Failed to download image. Status code: {response.status_code}")
|
packages.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
libgl1-mesa-glx
|
2 |
+
libglib2.0-0
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
openai>=1.0.0
|
3 |
+
Pillow>=9.0.0
|
4 |
+
requests>=2.28.0
|
style.css
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Custom CSS for OpenRouter AI Vision Interface */
|
2 |
+
|
3 |
+
/* Main container styling */
|
4 |
+
body {
|
5 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
|
6 |
+
background-color: #f7f7f7;
|
7 |
+
}
|
8 |
+
|
9 |
+
/* Header styling */
|
10 |
+
h1 {
|
11 |
+
color: #2a2a2a;
|
12 |
+
font-weight: 700;
|
13 |
+
margin-bottom: 0.5rem;
|
14 |
+
}
|
15 |
+
|
16 |
+
/* Button styling */
|
17 |
+
button.primary {
|
18 |
+
background-color: #2563eb;
|
19 |
+
color: white;
|
20 |
+
border: none;
|
21 |
+
padding: 0.5rem 1rem;
|
22 |
+
border-radius: 0.375rem;
|
23 |
+
font-weight: 500;
|
24 |
+
transition: background-color 0.2s;
|
25 |
+
}
|
26 |
+
|
27 |
+
button.primary:hover {
|
28 |
+
background-color: #1d4ed8;
|
29 |
+
}
|
30 |
+
|
31 |
+
/* Input container styling */
|
32 |
+
.input-container {
|
33 |
+
background-color: white;
|
34 |
+
border-radius: 0.5rem;
|
35 |
+
padding: 1.5rem;
|
36 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
37 |
+
}
|
38 |
+
|
39 |
+
/* Output container styling */
|
40 |
+
.output-container {
|
41 |
+
background-color: white;
|
42 |
+
border-radius: 0.5rem;
|
43 |
+
padding: 1.5rem;
|
44 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
45 |
+
min-height: 200px;
|
46 |
+
}
|
47 |
+
|
48 |
+
/* Model dropdown styling */
|
49 |
+
select {
|
50 |
+
border: 1px solid #e2e8f0;
|
51 |
+
border-radius: 0.375rem;
|
52 |
+
padding: 0.5rem;
|
53 |
+
width: 100%;
|
54 |
+
background-color: white;
|
55 |
+
}
|
56 |
+
|
57 |
+
/* Image upload area styling */
|
58 |
+
.image-upload-container {
|
59 |
+
border: 2px dashed #cbd5e1;
|
60 |
+
border-radius: 0.5rem;
|
61 |
+
padding: 2rem;
|
62 |
+
text-align: center;
|
63 |
+
background-color: #f8fafc;
|
64 |
+
transition: border-color 0.2s;
|
65 |
+
}
|
66 |
+
|
67 |
+
.image-upload-container:hover {
|
68 |
+
border-color: #94a3b8;
|
69 |
+
}
|