SHIKARICHACHA commited on
Commit
25a1f05
·
verified ·
1 Parent(s): 5b8ac07

Upload 7 files

Browse files
Files changed (7) hide show
  1. README.md +40 -0
  2. app.py +147 -0
  3. deploy.sh +65 -0
  4. download_examples.py +22 -0
  5. packages.txt +2 -0
  6. requirements.txt +4 -0
  7. style.css +69 -0
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: OpenRouter AI Vision Interface
3
+ emoji: 🔍
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # OpenRouter AI Vision Interface
14
+
15
+ This is a Gradio-based web interface that allows you to analyze images using various AI models through the OpenRouter API.
16
+
17
+ ## Features
18
+
19
+ - Upload and analyze images with AI models
20
+ - Choose from multiple vision-language models:
21
+ - Mistral Small
22
+ - Kimi Vision
23
+ - Gemini Pro
24
+ - Qwen VL
25
+ - Mistral 3.1
26
+ - Gemma
27
+ - Llama 3.2 Vision
28
+ - Simple and intuitive user interface
29
+ - Example images included
30
+
31
+ ## Usage
32
+
33
+ 1. Upload an image
34
+ 2. Enter a question about the image
35
+ 3. Select an AI model from the dropdown
36
+ 4. Click "Analyze Image" to get the AI's response
37
+
38
+ ## API Key
39
+
40
+ The OpenRouter API key is already included in the code. If you want to use your own API key, you can modify it in the `app.py` file.
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from openai import OpenAI
4
+ import base64
5
+ from PIL import Image
6
+ import io
7
+
8
+ # OpenRouter API key
9
+ OPENROUTER_API_KEY = "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"
10
+
11
+ # Available models
12
+ MODELS = {
13
+ "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
14
+ "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
15
+ "Gemini Pro": "google/gemini-2.5-pro-exp-03-25",
16
+ "Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free",
17
+ "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
18
+ "Gemma": "google/gemma-3-4b-it:free",
19
+ "Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
20
+ }
21
+
22
+ def image_to_base64(image):
23
+ """Convert an image to base64 encoding"""
24
+ # If image is a file path
25
+ if isinstance(image, str):
26
+ with open(image, "rb") as img_file:
27
+ return base64.b64encode(img_file.read()).decode()
28
+
29
+ # If image is already a PIL Image
30
+ buffered = io.BytesIO()
31
+ image.save(buffered, format="JPEG")
32
+ return base64.b64encode(buffered.getvalue()).decode()
33
+
34
+ def analyze_image(image, prompt, model_name):
35
+ """Analyze an image using the selected OpenRouter model"""
36
+ try:
37
+ # Initialize OpenAI client with OpenRouter base URL
38
+ client = OpenAI(
39
+ base_url="https://openrouter.ai/api/v1",
40
+ api_key=OPENROUTER_API_KEY,
41
+ )
42
+
43
+ # Convert image to base64
44
+ img_base64 = image_to_base64(image)
45
+
46
+ # Create the completion request
47
+ completion = client.chat.completions.create(
48
+ extra_headers={
49
+ "HTTP-Referer": "https://gradio-openrouter-interface.com",
50
+ "X-Title": "Gradio OpenRouter Interface",
51
+ },
52
+ model=MODELS[model_name],
53
+ messages=[
54
+ {
55
+ "role": "user",
56
+ "content": [
57
+ {
58
+ "type": "text",
59
+ "text": prompt
60
+ },
61
+ {
62
+ "type": "image_url",
63
+ "image_url": {
64
+ "url": f"data:image/jpeg;base64,{img_base64}"
65
+ }
66
+ }
67
+ ]
68
+ }
69
+ ]
70
+ )
71
+
72
+ # Return the model's response
73
+ return completion.choices[0].message.content
74
+
75
+ except Exception as e:
76
+ return f"Error: {str(e)}"
77
+
78
+ # Create the Gradio interface
79
+ with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css", theme=gr.themes.Soft()) as demo:
80
+ gr.Markdown(
81
+ """
82
+ # 🔍 OpenRouter AI Vision Interface
83
+
84
+ Upload an image and ask a question about it. The AI will analyze the image and respond.
85
+
86
+ *Powered by OpenRouter API with multiple vision-language models*
87
+ """
88
+ )
89
+
90
+ with gr.Row():
91
+ with gr.Column():
92
+ # Input components with custom styling
93
+ with gr.Box(elem_classes=["input-container"]):
94
+ image_input = gr.Image(type="pil", label="Upload Image", elem_classes=["image-upload-container"])
95
+ prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?")
96
+ model_dropdown = gr.Dropdown(
97
+ choices=list(MODELS.keys()),
98
+ value="Mistral Small",
99
+ label="Select AI Model",
100
+ info="Choose from different vision-language models"
101
+ )
102
+ submit_button = gr.Button("Analyze Image", variant="primary")
103
+
104
+ with gr.Column():
105
+ # Output component with custom styling
106
+ with gr.Box(elem_classes=["output-container"]):
107
+ output_text = gr.Textbox(label="AI Response", lines=12)
108
+
109
+ gr.Markdown(
110
+ """
111
+ ### Available Models
112
+ - **Mistral Small**: Powerful vision-language model from Mistral AI
113
+ - **Kimi Vision**: Specialized vision model from Moonshot AI
114
+ - **Gemini Pro**: Google's advanced multimodal model
115
+ - **Qwen VL**: Alibaba's vision-language model
116
+ - **Mistral 3.1**: Earlier version of Mistral's vision model
117
+ - **Gemma**: Google's lightweight vision model
118
+ - **Llama 3.2 Vision**: Meta's vision-enabled large language model
119
+ """
120
+ )
121
+
122
+ # Set up the submit action
123
+ submit_button.click(
124
+ fn=analyze_image,
125
+ inputs=[image_input, prompt_input, model_dropdown],
126
+ outputs=output_text
127
+ )
128
+
129
+ # Add example
130
+ gr.Examples(
131
+ examples=[
132
+ ["examples/nature.jpg", "What is in this image?", "Mistral Small"],
133
+ ["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"],
134
+ ],
135
+ inputs=[image_input, prompt_input, model_dropdown],
136
+ )
137
+
138
+ # Create examples directory if it doesn't exist
139
+ os.makedirs("examples", exist_ok=True)
140
+
141
+ # For Hugging Face Spaces compatibility
142
+ if __name__ == "__main__":
143
+ # Launch the interface
144
+ demo.launch(share=True)
145
+ else:
146
+ # For Hugging Face Spaces, we need to expose the app
147
+ app = demo.launch(share=False, show_api=False)
deploy.sh ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Script to deploy the application to Hugging Face Spaces
4
+
5
+ set -e
6
+
7
+ # Check if huggingface_hub is installed
8
+ if ! pip show huggingface_hub > /dev/null 2>&1; then
9
+ echo "Installing huggingface_hub..."
10
+ pip install huggingface_hub
11
+ fi
12
+
13
+ # Check if git is installed
14
+ if ! command -v git &> /dev/null; then
15
+ echo "Error: git is not installed. Please install git and try again."
16
+ exit 1
17
+ fi
18
+
19
+ # Get Hugging Face username
20
+ read -p "Enter your Hugging Face username: " HF_USERNAME
21
+ if [ -z "$HF_USERNAME" ]; then
22
+ echo "Error: Hugging Face username cannot be empty."
23
+ exit 1
24
+ fi
25
+
26
+ # Get Space name
27
+ read -p "Enter a name for your Hugging Face Space (lowercase, no spaces): " SPACE_NAME
28
+ if [ -z "$SPACE_NAME" ]; then
29
+ echo "Error: Space name cannot be empty."
30
+ exit 1
31
+ fi
32
+
33
+ # Convert to lowercase and replace spaces with dashes
34
+ SPACE_NAME=$(echo "$SPACE_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')
35
+
36
+ # Create examples directory
37
+ mkdir -p examples
38
+
39
+ # Download example images
40
+ python download_examples.py
41
+
42
+ # Initialize git repository if not already initialized
43
+ if [ ! -d .git ]; then
44
+ git init
45
+ fi
46
+
47
+ # Create Hugging Face Space using the API
48
+ echo "Creating Hugging Face Space..."
49
+ python -c "from huggingface_hub import create_repo; create_repo(repo_id='$HF_USERNAME/$SPACE_NAME', repo_type='space', space_sdk='gradio')"
50
+
51
+ # Add remote
52
+ git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME || git remote set-url origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
53
+
54
+ # Add all files
55
+ git add .
56
+
57
+ # Commit
58
+ git commit -m "Initial commit with proper configuration"
59
+
60
+ # Push to Hugging Face
61
+ echo "Pushing to Hugging Face Spaces..."
62
+ git push -u origin main --force
63
+
64
+ echo "Deployment complete! Your application is now available at: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
65
+ echo "Note: It may take a few minutes for the application to build and deploy."
download_examples.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from PIL import Image
4
+ from io import BytesIO
5
+
6
+ # Create examples directory if it doesn't exist
7
+ os.makedirs("examples", exist_ok=True)
8
+
9
+ # URL of the example image
10
+ image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
11
+
12
+ # Download and save the image
13
+ response = requests.get(image_url)
14
+ if response.status_code == 200:
15
+ # Open the image from the response content
16
+ img = Image.open(BytesIO(response.content))
17
+
18
+ # Save the image to the examples directory
19
+ img.save("examples/nature.jpg")
20
+ print("Example image downloaded successfully!")
21
+ else:
22
+ print(f"Failed to download image. Status code: {response.status_code}")
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libgl1-mesa-glx
2
+ libglib2.0-0
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ openai>=1.0.0
3
+ Pillow>=9.0.0
4
+ requests>=2.28.0
style.css ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Custom CSS for OpenRouter AI Vision Interface */
2
+
3
+ /* Main container styling */
4
+ body {
5
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
6
+ background-color: #f7f7f7;
7
+ }
8
+
9
+ /* Header styling */
10
+ h1 {
11
+ color: #2a2a2a;
12
+ font-weight: 700;
13
+ margin-bottom: 0.5rem;
14
+ }
15
+
16
+ /* Button styling */
17
+ button.primary {
18
+ background-color: #2563eb;
19
+ color: white;
20
+ border: none;
21
+ padding: 0.5rem 1rem;
22
+ border-radius: 0.375rem;
23
+ font-weight: 500;
24
+ transition: background-color 0.2s;
25
+ }
26
+
27
+ button.primary:hover {
28
+ background-color: #1d4ed8;
29
+ }
30
+
31
+ /* Input container styling */
32
+ .input-container {
33
+ background-color: white;
34
+ border-radius: 0.5rem;
35
+ padding: 1.5rem;
36
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
37
+ }
38
+
39
+ /* Output container styling */
40
+ .output-container {
41
+ background-color: white;
42
+ border-radius: 0.5rem;
43
+ padding: 1.5rem;
44
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
45
+ min-height: 200px;
46
+ }
47
+
48
+ /* Model dropdown styling */
49
+ select {
50
+ border: 1px solid #e2e8f0;
51
+ border-radius: 0.375rem;
52
+ padding: 0.5rem;
53
+ width: 100%;
54
+ background-color: white;
55
+ }
56
+
57
+ /* Image upload area styling */
58
+ .image-upload-container {
59
+ border: 2px dashed #cbd5e1;
60
+ border-radius: 0.5rem;
61
+ padding: 2rem;
62
+ text-align: center;
63
+ background-color: #f8fafc;
64
+ transition: border-color 0.2s;
65
+ }
66
+
67
+ .image-upload-container:hover {
68
+ border-color: #94a3b8;
69
+ }