Spaces:
Sleeping
Sleeping
Upload 7 files
Browse files- README.md +40 -0
- app.py +147 -0
- deploy.sh +65 -0
- download_examples.py +22 -0
- packages.txt +2 -0
- requirements.txt +4 -0
- style.css +69 -0
README.md
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: OpenRouter AI Vision Interface
|
| 3 |
+
emoji: 🔍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# OpenRouter AI Vision Interface
|
| 14 |
+
|
| 15 |
+
This is a Gradio-based web interface that allows you to analyze images using various AI models through the OpenRouter API.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- Upload and analyze images with AI models
|
| 20 |
+
- Choose from multiple vision-language models:
|
| 21 |
+
- Mistral Small
|
| 22 |
+
- Kimi Vision
|
| 23 |
+
- Gemini Pro
|
| 24 |
+
- Qwen VL
|
| 25 |
+
- Mistral 3.1
|
| 26 |
+
- Gemma
|
| 27 |
+
- Llama 3.2 Vision
|
| 28 |
+
- Simple and intuitive user interface
|
| 29 |
+
- Example images included
|
| 30 |
+
|
| 31 |
+
## Usage
|
| 32 |
+
|
| 33 |
+
1. Upload an image
|
| 34 |
+
2. Enter a question about the image
|
| 35 |
+
3. Select an AI model from the dropdown
|
| 36 |
+
4. Click "Analyze Image" to get the AI's response
|
| 37 |
+
|
| 38 |
+
## API Key
|
| 39 |
+
|
| 40 |
+
The OpenRouter API key is already included in the code. If you want to use your own API key, you can modify it in the `app.py` file.
|
app.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
from openai import OpenAI
|
| 4 |
+
import base64
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import io
|
| 7 |
+
|
| 8 |
+
# OpenRouter API key
|
| 9 |
+
OPENROUTER_API_KEY = "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"
|
| 10 |
+
|
| 11 |
+
# Available models
|
| 12 |
+
MODELS = {
|
| 13 |
+
"Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
|
| 14 |
+
"Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
|
| 15 |
+
"Gemini Pro": "google/gemini-2.5-pro-exp-03-25",
|
| 16 |
+
"Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free",
|
| 17 |
+
"Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
|
| 18 |
+
"Gemma": "google/gemma-3-4b-it:free",
|
| 19 |
+
"Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def image_to_base64(image):
|
| 23 |
+
"""Convert an image to base64 encoding"""
|
| 24 |
+
# If image is a file path
|
| 25 |
+
if isinstance(image, str):
|
| 26 |
+
with open(image, "rb") as img_file:
|
| 27 |
+
return base64.b64encode(img_file.read()).decode()
|
| 28 |
+
|
| 29 |
+
# If image is already a PIL Image
|
| 30 |
+
buffered = io.BytesIO()
|
| 31 |
+
image.save(buffered, format="JPEG")
|
| 32 |
+
return base64.b64encode(buffered.getvalue()).decode()
|
| 33 |
+
|
| 34 |
+
def analyze_image(image, prompt, model_name):
|
| 35 |
+
"""Analyze an image using the selected OpenRouter model"""
|
| 36 |
+
try:
|
| 37 |
+
# Initialize OpenAI client with OpenRouter base URL
|
| 38 |
+
client = OpenAI(
|
| 39 |
+
base_url="https://openrouter.ai/api/v1",
|
| 40 |
+
api_key=OPENROUTER_API_KEY,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Convert image to base64
|
| 44 |
+
img_base64 = image_to_base64(image)
|
| 45 |
+
|
| 46 |
+
# Create the completion request
|
| 47 |
+
completion = client.chat.completions.create(
|
| 48 |
+
extra_headers={
|
| 49 |
+
"HTTP-Referer": "https://gradio-openrouter-interface.com",
|
| 50 |
+
"X-Title": "Gradio OpenRouter Interface",
|
| 51 |
+
},
|
| 52 |
+
model=MODELS[model_name],
|
| 53 |
+
messages=[
|
| 54 |
+
{
|
| 55 |
+
"role": "user",
|
| 56 |
+
"content": [
|
| 57 |
+
{
|
| 58 |
+
"type": "text",
|
| 59 |
+
"text": prompt
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"type": "image_url",
|
| 63 |
+
"image_url": {
|
| 64 |
+
"url": f"data:image/jpeg;base64,{img_base64}"
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
}
|
| 69 |
+
]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Return the model's response
|
| 73 |
+
return completion.choices[0].message.content
|
| 74 |
+
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return f"Error: {str(e)}"
|
| 77 |
+
|
| 78 |
+
# Create the Gradio interface
|
| 79 |
+
with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css", theme=gr.themes.Soft()) as demo:
|
| 80 |
+
gr.Markdown(
|
| 81 |
+
"""
|
| 82 |
+
# 🔍 OpenRouter AI Vision Interface
|
| 83 |
+
|
| 84 |
+
Upload an image and ask a question about it. The AI will analyze the image and respond.
|
| 85 |
+
|
| 86 |
+
*Powered by OpenRouter API with multiple vision-language models*
|
| 87 |
+
"""
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
with gr.Row():
|
| 91 |
+
with gr.Column():
|
| 92 |
+
# Input components with custom styling
|
| 93 |
+
with gr.Box(elem_classes=["input-container"]):
|
| 94 |
+
image_input = gr.Image(type="pil", label="Upload Image", elem_classes=["image-upload-container"])
|
| 95 |
+
prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?")
|
| 96 |
+
model_dropdown = gr.Dropdown(
|
| 97 |
+
choices=list(MODELS.keys()),
|
| 98 |
+
value="Mistral Small",
|
| 99 |
+
label="Select AI Model",
|
| 100 |
+
info="Choose from different vision-language models"
|
| 101 |
+
)
|
| 102 |
+
submit_button = gr.Button("Analyze Image", variant="primary")
|
| 103 |
+
|
| 104 |
+
with gr.Column():
|
| 105 |
+
# Output component with custom styling
|
| 106 |
+
with gr.Box(elem_classes=["output-container"]):
|
| 107 |
+
output_text = gr.Textbox(label="AI Response", lines=12)
|
| 108 |
+
|
| 109 |
+
gr.Markdown(
|
| 110 |
+
"""
|
| 111 |
+
### Available Models
|
| 112 |
+
- **Mistral Small**: Powerful vision-language model from Mistral AI
|
| 113 |
+
- **Kimi Vision**: Specialized vision model from Moonshot AI
|
| 114 |
+
- **Gemini Pro**: Google's advanced multimodal model
|
| 115 |
+
- **Qwen VL**: Alibaba's vision-language model
|
| 116 |
+
- **Mistral 3.1**: Earlier version of Mistral's vision model
|
| 117 |
+
- **Gemma**: Google's lightweight vision model
|
| 118 |
+
- **Llama 3.2 Vision**: Meta's vision-enabled large language model
|
| 119 |
+
"""
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# Set up the submit action
|
| 123 |
+
submit_button.click(
|
| 124 |
+
fn=analyze_image,
|
| 125 |
+
inputs=[image_input, prompt_input, model_dropdown],
|
| 126 |
+
outputs=output_text
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Add example
|
| 130 |
+
gr.Examples(
|
| 131 |
+
examples=[
|
| 132 |
+
["examples/nature.jpg", "What is in this image?", "Mistral Small"],
|
| 133 |
+
["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"],
|
| 134 |
+
],
|
| 135 |
+
inputs=[image_input, prompt_input, model_dropdown],
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Create examples directory if it doesn't exist
|
| 139 |
+
os.makedirs("examples", exist_ok=True)
|
| 140 |
+
|
| 141 |
+
# For Hugging Face Spaces compatibility
|
| 142 |
+
if __name__ == "__main__":
|
| 143 |
+
# Launch the interface
|
| 144 |
+
demo.launch(share=True)
|
| 145 |
+
else:
|
| 146 |
+
# For Hugging Face Spaces, we need to expose the app
|
| 147 |
+
app = demo.launch(share=False, show_api=False)
|
deploy.sh
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Script to deploy the application to Hugging Face Spaces
|
| 4 |
+
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# Check if huggingface_hub is installed
|
| 8 |
+
if ! pip show huggingface_hub > /dev/null 2>&1; then
|
| 9 |
+
echo "Installing huggingface_hub..."
|
| 10 |
+
pip install huggingface_hub
|
| 11 |
+
fi
|
| 12 |
+
|
| 13 |
+
# Check if git is installed
|
| 14 |
+
if ! command -v git &> /dev/null; then
|
| 15 |
+
echo "Error: git is not installed. Please install git and try again."
|
| 16 |
+
exit 1
|
| 17 |
+
fi
|
| 18 |
+
|
| 19 |
+
# Get Hugging Face username
|
| 20 |
+
read -p "Enter your Hugging Face username: " HF_USERNAME
|
| 21 |
+
if [ -z "$HF_USERNAME" ]; then
|
| 22 |
+
echo "Error: Hugging Face username cannot be empty."
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
# Get Space name
|
| 27 |
+
read -p "Enter a name for your Hugging Face Space (lowercase, no spaces): " SPACE_NAME
|
| 28 |
+
if [ -z "$SPACE_NAME" ]; then
|
| 29 |
+
echo "Error: Space name cannot be empty."
|
| 30 |
+
exit 1
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
# Convert to lowercase and replace spaces with dashes
|
| 34 |
+
SPACE_NAME=$(echo "$SPACE_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')
|
| 35 |
+
|
| 36 |
+
# Create examples directory
|
| 37 |
+
mkdir -p examples
|
| 38 |
+
|
| 39 |
+
# Download example images
|
| 40 |
+
python download_examples.py
|
| 41 |
+
|
| 42 |
+
# Initialize git repository if not already initialized
|
| 43 |
+
if [ ! -d .git ]; then
|
| 44 |
+
git init
|
| 45 |
+
fi
|
| 46 |
+
|
| 47 |
+
# Create Hugging Face Space using the API
|
| 48 |
+
echo "Creating Hugging Face Space..."
|
| 49 |
+
python -c "from huggingface_hub import create_repo; create_repo(repo_id='$HF_USERNAME/$SPACE_NAME', repo_type='space', space_sdk='gradio')"
|
| 50 |
+
|
| 51 |
+
# Add remote
|
| 52 |
+
git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME || git remote set-url origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
|
| 53 |
+
|
| 54 |
+
# Add all files
|
| 55 |
+
git add .
|
| 56 |
+
|
| 57 |
+
# Commit
|
| 58 |
+
git commit -m "Initial commit with proper configuration"
|
| 59 |
+
|
| 60 |
+
# Push to Hugging Face
|
| 61 |
+
echo "Pushing to Hugging Face Spaces..."
|
| 62 |
+
git push -u origin main --force
|
| 63 |
+
|
| 64 |
+
echo "Deployment complete! Your application is now available at: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
|
| 65 |
+
echo "Note: It may take a few minutes for the application to build and deploy."
|
download_examples.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from io import BytesIO
|
| 5 |
+
|
| 6 |
+
# Create examples directory if it doesn't exist
|
| 7 |
+
os.makedirs("examples", exist_ok=True)
|
| 8 |
+
|
| 9 |
+
# URL of the example image
|
| 10 |
+
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
|
| 11 |
+
|
| 12 |
+
# Download and save the image
|
| 13 |
+
response = requests.get(image_url)
|
| 14 |
+
if response.status_code == 200:
|
| 15 |
+
# Open the image from the response content
|
| 16 |
+
img = Image.open(BytesIO(response.content))
|
| 17 |
+
|
| 18 |
+
# Save the image to the examples directory
|
| 19 |
+
img.save("examples/nature.jpg")
|
| 20 |
+
print("Example image downloaded successfully!")
|
| 21 |
+
else:
|
| 22 |
+
print(f"Failed to download image. Status code: {response.status_code}")
|
packages.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
libgl1-mesa-glx
|
| 2 |
+
libglib2.0-0
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
openai>=1.0.0
|
| 3 |
+
Pillow>=9.0.0
|
| 4 |
+
requests>=2.28.0
|
style.css
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Custom CSS for OpenRouter AI Vision Interface */
|
| 2 |
+
|
| 3 |
+
/* Main container styling */
|
| 4 |
+
body {
|
| 5 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 6 |
+
background-color: #f7f7f7;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
/* Header styling */
|
| 10 |
+
h1 {
|
| 11 |
+
color: #2a2a2a;
|
| 12 |
+
font-weight: 700;
|
| 13 |
+
margin-bottom: 0.5rem;
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
/* Button styling */
|
| 17 |
+
button.primary {
|
| 18 |
+
background-color: #2563eb;
|
| 19 |
+
color: white;
|
| 20 |
+
border: none;
|
| 21 |
+
padding: 0.5rem 1rem;
|
| 22 |
+
border-radius: 0.375rem;
|
| 23 |
+
font-weight: 500;
|
| 24 |
+
transition: background-color 0.2s;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
button.primary:hover {
|
| 28 |
+
background-color: #1d4ed8;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
/* Input container styling */
|
| 32 |
+
.input-container {
|
| 33 |
+
background-color: white;
|
| 34 |
+
border-radius: 0.5rem;
|
| 35 |
+
padding: 1.5rem;
|
| 36 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
/* Output container styling */
|
| 40 |
+
.output-container {
|
| 41 |
+
background-color: white;
|
| 42 |
+
border-radius: 0.5rem;
|
| 43 |
+
padding: 1.5rem;
|
| 44 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 45 |
+
min-height: 200px;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
/* Model dropdown styling */
|
| 49 |
+
select {
|
| 50 |
+
border: 1px solid #e2e8f0;
|
| 51 |
+
border-radius: 0.375rem;
|
| 52 |
+
padding: 0.5rem;
|
| 53 |
+
width: 100%;
|
| 54 |
+
background-color: white;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
/* Image upload area styling */
|
| 58 |
+
.image-upload-container {
|
| 59 |
+
border: 2px dashed #cbd5e1;
|
| 60 |
+
border-radius: 0.5rem;
|
| 61 |
+
padding: 2rem;
|
| 62 |
+
text-align: center;
|
| 63 |
+
background-color: #f8fafc;
|
| 64 |
+
transition: border-color 0.2s;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.image-upload-container:hover {
|
| 68 |
+
border-color: #94a3b8;
|
| 69 |
+
}
|