Spaces:

SHIKARICHACHA
/

Multi_AI_Agent

Sleeping

App Files Files Community

SHIKARICHACHA commited on Aug 6

Commit

25a1f05

verified ·

1 Parent(s): 5b8ac07

Upload 7 files

Browse files

Files changed (7) hide show

README.md +40 -0
app.py +147 -0
deploy.sh +65 -0
download_examples.py +22 -0
packages.txt +2 -0
requirements.txt +4 -0
style.css +69 -0

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+---
+title: OpenRouter AI Vision Interface
+emoji: 🔍
+colorFrom: blue
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# OpenRouter AI Vision Interface
+This is a Gradio-based web interface that allows you to analyze images using various AI models through the OpenRouter API.
+## Features
+- Upload and analyze images with AI models
+- Choose from multiple vision-language models:
+  - Mistral Small
+  - Kimi Vision
+  - Gemini Pro
+  - Qwen VL
+  - Mistral 3.1
+  - Gemma
+  - Llama 3.2 Vision
+- Simple and intuitive user interface
+- Example images included
+## Usage
+1. Upload an image
+2. Enter a question about the image
+3. Select an AI model from the dropdown
+4. Click "Analyze Image" to get the AI's response
+## API Key
+The OpenRouter API key is already included in the code. If you want to use your own API key, you can modify it in the `app.py` file.

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import gradio as gr
+import os
+from openai import OpenAI
+import base64
+from PIL import Image
+import io
+# OpenRouter API key
+OPENROUTER_API_KEY = "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"
+# Available models
+MODELS = {
+    "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
+    "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
+    "Gemini Pro": "google/gemini-2.5-pro-exp-03-25",
+    "Qwen VL": "qwen/qwen2.5-vl-32b-instruct:free",
+    "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
+    "Gemma": "google/gemma-3-4b-it:free",
+    "Llama 3.2 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
+}
+def image_to_base64(image):
+    """Convert an image to base64 encoding"""
+    # If image is a file path
+    if isinstance(image, str):
+        with open(image, "rb") as img_file:
+            return base64.b64encode(img_file.read()).decode()
+    # If image is already a PIL Image
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    return base64.b64encode(buffered.getvalue()).decode()
+def analyze_image(image, prompt, model_name):
+    """Analyze an image using the selected OpenRouter model"""
+    try:
+        # Initialize OpenAI client with OpenRouter base URL
+        client = OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=OPENROUTER_API_KEY,
+        )
+        # Convert image to base64
+        img_base64 = image_to_base64(image)
+        # Create the completion request
+        completion = client.chat.completions.create(
+            extra_headers={
+                "HTTP-Referer": "https://gradio-openrouter-interface.com",
+                "X-Title": "Gradio OpenRouter Interface",
+            },
+            model=MODELS[model_name],
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": prompt
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{img_base64}"
+                            }
+                        }
+                    ]
+                }
+            ]
+        )
+        # Return the model's response
+        return completion.choices[0].message.content
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create the Gradio interface
+with gr.Blocks(title="OpenRouter AI Vision Interface", css="style.css", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🔍 OpenRouter AI Vision Interface
+        Upload an image and ask a question about it. The AI will analyze the image and respond.
+        *Powered by OpenRouter API with multiple vision-language models*
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            # Input components with custom styling
+            with gr.Box(elem_classes=["input-container"]):
+                image_input = gr.Image(type="pil", label="Upload Image", elem_classes=["image-upload-container"])
+                prompt_input = gr.Textbox(label="Your Question", placeholder="What is in this image?", value="What is in this image?")
+                model_dropdown = gr.Dropdown(
+                    choices=list(MODELS.keys()),
+                    value="Mistral Small",
+                    label="Select AI Model",
+                    info="Choose from different vision-language models"
+                )
+                submit_button = gr.Button("Analyze Image", variant="primary")
+        with gr.Column():
+            # Output component with custom styling
+            with gr.Box(elem_classes=["output-container"]):
+                output_text = gr.Textbox(label="AI Response", lines=12)
+    gr.Markdown(
+        """
+        ### Available Models
+        - **Mistral Small**: Powerful vision-language model from Mistral AI
+        - **Kimi Vision**: Specialized vision model from Moonshot AI
+        - **Gemini Pro**: Google's advanced multimodal model
+        - **Qwen VL**: Alibaba's vision-language model
+        - **Mistral 3.1**: Earlier version of Mistral's vision model
+        - **Gemma**: Google's lightweight vision model
+        - **Llama 3.2 Vision**: Meta's vision-enabled large language model
+        """
+    )
+    # Set up the submit action
+    submit_button.click(
+        fn=analyze_image,
+        inputs=[image_input, prompt_input, model_dropdown],
+        outputs=output_text
+    )
+    # Add example
+    gr.Examples(
+        examples=[
+            ["examples/nature.jpg", "What is in this image?", "Mistral Small"],
+            ["examples/nature.jpg", "Describe this scene in detail", "Kimi Vision"],
+        ],
+        inputs=[image_input, prompt_input, model_dropdown],
+    )
+# Create examples directory if it doesn't exist
+os.makedirs("examples", exist_ok=True)
+# For Hugging Face Spaces compatibility
+if __name__ == "__main__":
+    # Launch the interface
+    demo.launch(share=True)
+else:
+    # For Hugging Face Spaces, we need to expose the app
+    app = demo.launch(share=False, show_api=False)

deploy.sh ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/bin/bash
+# Script to deploy the application to Hugging Face Spaces
+set -e
+# Check if huggingface_hub is installed
+if ! pip show huggingface_hub > /dev/null 2>&1; then
+  echo "Installing huggingface_hub..."
+  pip install huggingface_hub
+fi
+# Check if git is installed
+if ! command -v git &> /dev/null; then
+  echo "Error: git is not installed. Please install git and try again."
+  exit 1
+fi
+# Get Hugging Face username
+read -p "Enter your Hugging Face username: " HF_USERNAME
+if [ -z "$HF_USERNAME" ]; then
+  echo "Error: Hugging Face username cannot be empty."
+  exit 1
+fi
+# Get Space name
+read -p "Enter a name for your Hugging Face Space (lowercase, no spaces): " SPACE_NAME
+if [ -z "$SPACE_NAME" ]; then
+  echo "Error: Space name cannot be empty."
+  exit 1
+fi
+# Convert to lowercase and replace spaces with dashes
+SPACE_NAME=$(echo "$SPACE_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '-')
+# Create examples directory
+mkdir -p examples
+# Download example images
+python download_examples.py
+# Initialize git repository if not already initialized
+if [ ! -d .git ]; then
+  git init
+fi
+# Create Hugging Face Space using the API
+echo "Creating Hugging Face Space..."
+python -c "from huggingface_hub import create_repo; create_repo(repo_id='$HF_USERNAME/$SPACE_NAME', repo_type='space', space_sdk='gradio')"
+# Add remote
+git remote add origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME || git remote set-url origin https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
+# Add all files
+git add .
+# Commit
+git commit -m "Initial commit with proper configuration"
+# Push to Hugging Face
+echo "Pushing to Hugging Face Spaces..."
+git push -u origin main --force
+echo "Deployment complete! Your application is now available at: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
+echo "Note: It may take a few minutes for the application to build and deploy."

download_examples.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+import requests
+from PIL import Image
+from io import BytesIO
+# Create examples directory if it doesn't exist
+os.makedirs("examples", exist_ok=True)
+# URL of the example image
+image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+# Download and save the image
+response = requests.get(image_url)
+if response.status_code == 200:
+    # Open the image from the response content
+    img = Image.open(BytesIO(response.content))
+    # Save the image to the examples directory
+    img.save("examples/nature.jpg")
+    print("Example image downloaded successfully!")
+else:
+    print(f"Failed to download image. Status code: {response.status_code}")

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ libgl1-mesa-glx
2	+ libglib2.0-0

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio>=4.0.0
+openai>=1.0.0
+Pillow>=9.0.0
+requests>=2.28.0

style.css ADDED Viewed

	@@ -0,0 +1,69 @@

+/* Custom CSS for OpenRouter AI Vision Interface */
+/* Main container styling */
+body {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
+    background-color: #f7f7f7;
+}
+/* Header styling */
+h1 {
+    color: #2a2a2a;
+    font-weight: 700;
+    margin-bottom: 0.5rem;
+}
+/* Button styling */
+button.primary {
+    background-color: #2563eb;
+    color: white;
+    border: none;
+    padding: 0.5rem 1rem;
+    border-radius: 0.375rem;
+    font-weight: 500;
+    transition: background-color 0.2s;
+}
+button.primary:hover {
+    background-color: #1d4ed8;
+}
+/* Input container styling */
+.input-container {
+    background-color: white;
+    border-radius: 0.5rem;
+    padding: 1.5rem;
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+}
+/* Output container styling */
+.output-container {
+    background-color: white;
+    border-radius: 0.5rem;
+    padding: 1.5rem;
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
+    min-height: 200px;
+}
+/* Model dropdown styling */
+select {
+    border: 1px solid #e2e8f0;
+    border-radius: 0.375rem;
+    padding: 0.5rem;
+    width: 100%;
+    background-color: white;
+}
+/* Image upload area styling */
+.image-upload-container {
+    border: 2px dashed #cbd5e1;
+    border-radius: 0.5rem;
+    padding: 2rem;
+    text-align: center;
+    background-color: #f8fafc;
+    transition: border-color 0.2s;
+}
+.image-upload-container:hover {
+    border-color: #94a3b8;
+}