Spaces:

sayantan47
/

Hot-or-Not

Sleeping

App Files Files Community

sayantan47 commited on Jul 26

Commit

c72ead4

1 Parent(s): 97c315c

refactor

Browse files

Files changed (4) hide show

.gitignore +4 -0
README.md +98 -13
app.py +14 -178
core.py +311 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+hf_cache
+app-local.py
+__pycache__
+.vscode

README.md CHANGED Viewed

@@ -1,13 +1,98 @@
----
-title: HotorNot
-emoji: 🏢
-colorFrom: gray
-colorTo: blue
-sdk: gradio
-sdk_version: 5.38.2
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Hot or Not - CLIP ONNX Implementation
+A modular Hot or Not application using CLIP ONNX models with automatic gender and age detection.
+## 🏗️ Architecture
+The codebase has been refactored into a modular structure for better maintainability:
+### Core Components
+- **`core.py`** - Contains all the core logic for hot-or-not scoring
+  - Abstract model interface (`ModelInterface`)
+  - HuggingFace model implementation (`HuggingFaceModel`)
+  - Local model implementation (`LocalModel`)
+  - Core scoring logic (`HotOrNotScorer`)
+  - Utility functions and configuration
+- **`app.py`** - Gradio UI using HuggingFace Hub model
+  - Downloads and uses models from HuggingFace Hub
+  - Default repo: `sayantan47/clip-vit-b32-onnx`
+- **`app-local.py`** - Gradio UI using local model files
+  - Uses locally stored ONNX model files
+  - Configurable model and processor paths
+## 🚀 Usage
+### Running with HuggingFace Model
+```bash
+python app.py
+```
+This will automatically download the model from HuggingFace Hub and start the Gradio interface.
+### Running with Local Model
+1. Place your ONNX model file in the expected location (default: `models/model.onnx`)
+2. Update the `MODEL_PATH` in `app-local.py` if needed
+3. Run:
+```bash
+python app-local.py
+```
+### Customizing Model Paths
+For local models, edit the configuration in `app-local.py`:
+```python
+MODEL_PATH = "path/to/your/model.onnx"
+PROCESSOR_PATH = "path/to/your/processor"  # Optional
+```
+## 🔧 Configuration
+The `Config` class in `core.py` contains shared configuration:
+- `FIXED_IMG_W`, `FIXED_IMG_H`: Image display dimensions (300x300)
+- `DEFAULT_OUTPUT`: Fallback values when model fails
+- `PROVIDERS`: ONNX execution providers (CPU by default)
+## 📦 Dependencies
+Install required packages:
+```bash
+pip install -r requirements.txt
+```
+Required packages:
+- numpy
+- onnxruntime
+- huggingface_hub
+- transformers
+- Pillow
+- gradio
+## 🧠 How It Works
+1. **Image Analysis**: Uses CLIP to analyze uploaded images
+2. **Gender Detection**: Classifies between "man", "woman", or "unknown"
+3. **Age Detection**: Categorizes as "young", "middle-aged", or "old"
+4. **Attractiveness Scoring**: Uses gender-specific positive/negative prompts
+5. **Score Calculation**: Generates composite scores and individual metrics
+## 🏗️ Extending the System
+The modular design makes it easy to:
+- Add new model implementations by extending `ModelInterface`
+- Create different UI frontends using the core `HotOrNotScorer`
+- Modify scoring algorithms in the core module
+- Add new model sources (local files, different hubs, etc.)
+## 📄 License
+MIT License

app.py CHANGED Viewed

@@ -1,192 +1,28 @@
-import os
-import sys
-import traceback
-import numpy as np
-import onnxruntime as ort
-from huggingface_hub import hf_hub_download
-from transformers import CLIPProcessor
-from PIL import Image
 import gradio as gr
 # ============================================================
-# Config
 # ============================================================
-REPO_ID = "sayantan47/clip-vit-b32-onnx"  # <-- change this
 MODEL_FILENAME = "onnx/model.onnx"
-PROVIDERS = ["CPUExecutionProvider"]  # keep CPU to avoid CUDA DLL issues
-DEFAULT_OUTPUT = (0.0, 0.0, 0.0, 0.0, "unknown", "unknown")
-FIXED_IMG_W = 300
-FIXED_IMG_H = 300
-# ============================================================
-# Utils
-# ============================================================
-def _print_exc(prefix: str):
-    print(prefix, file=sys.stderr)
-    traceback.print_exc()
-def _softmax_safe(x: np.ndarray, axis: int = -1) -> np.ndarray:
-    try:
-        x = x - np.max(x, axis=axis, keepdims=True)
-        ex = np.exp(x)
-        denom = np.sum(ex, axis=axis, keepdims=True)
-        denom = np.where(denom == 0, 1.0, denom)
-        return ex / denom
-    except Exception:
-        _print_exc("[_softmax_safe] failed")
-        return np.ones_like(x) / x.shape[-1]
-def _ensure_int64(feed_dict):
-    out = {}
-    for k, v in feed_dict.items():
-        if isinstance(v, np.ndarray) and v.dtype == np.int32:
-            out[k] = v.astype(np.int64)
-        else:
-            out[k] = v
-    return out
-def _dummy_image(width=FIXED_IMG_W, height=FIXED_IMG_H):
-    return Image.fromarray(np.full((height, width, 3), 127, dtype=np.uint8), "RGB")
-# ============================================================
-# Load from HF Hub
-# ============================================================
-def load_from_hub():
-    # download model.onnx
-    model_path = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=MODEL_FILENAME,
-        local_dir="hf_cache",
-        local_dir_use_symlinks=False,
-        resume_download=True,
-    )
-    # load processor (tokenizer + preproc files) from the same repo
-    proc = CLIPProcessor.from_pretrained(REPO_ID)
-    sess = ort.InferenceSession(model_path, providers=PROVIDERS)
-    return proc, sess
-try:
-    processor, session = load_from_hub()
-except Exception:
-    _print_exc("[GLOBAL INIT] Failed to download/load model from HF Hub.")
-    processor, session = None, None
 # ============================================================
-# Core helpers
 # ============================================================
-def _run_clip(image_pil: Image.Image, texts):
-    if processor is None or session is None:
-        return None
-    try:
-        inputs = processor(
-            text=texts, images=image_pil, return_tensors="np", padding=True
-        )
-        ort_inputs = _ensure_int64(inputs)
-        outputs = session.run(None, ort_inputs)
-        logits_per_image = outputs[0]  # (1, n_texts)
-        probs = _softmax_safe(logits_per_image, axis=-1)[0]
-        return probs
-    except Exception:
-        _print_exc("[_run_clip] Inference failed")
-        return None
-def detect_gender(image_pil: Image.Image) -> str:
-    texts = ["a man", "a woman"]
-    probs = _run_clip(image_pil, texts)
-    if probs is None:
-        return "unknown"
-    return "man" if int(np.argmax(probs)) == 0 else "woman"
-def detect_age_group(image_pil: Image.Image) -> str:
-    texts = ["a young person", "a middle-aged person", "an old person"]
-    probs = _run_clip(image_pil, texts)
-    if probs is None:
-        return "unknown"
-    return ["young", "middle-aged", "old"][int(np.argmax(probs))]
-def score_with_terms(image_pil: Image.Image, positive_terms, negative_terms):
-    probs_all = []
-    for pos, neg in zip(positive_terms, negative_terms):
-        probs = _run_clip(image_pil, [pos, neg])
-        if probs is None or len(probs) != 2:
-            return (
-                DEFAULT_OUTPUT[0],
-                DEFAULT_OUTPUT[1],
-                DEFAULT_OUTPUT[2],
-                DEFAULT_OUTPUT[3],
-            )
-        probs_all.append(probs)
-    positive_probs = [p[0] for p in probs_all]
-    negative_probs = [p[1] for p in probs_all]
-    s1 = round((probs_all[0][0] - probs_all[0][1] + 1) * 50, 2)
-    s2 = round((probs_all[1][0] - probs_all[1][1] + 1) * 50, 2)
-    s3 = round((probs_all[2][0] - probs_all[2][1] + 1) * 50, 2)
-    hot_score = float(np.mean(positive_probs))
-    ugly_score = float(np.mean(negative_probs))
-    composite = round(((hot_score - ugly_score) + 1) * 50, 2)
-    return composite, s1, s2, s3
 # ============================================================
 # Gradio callback
 # ============================================================
 def hotornot(image):
-    if processor is None or session is None:
-        return DEFAULT_OUTPUT
-    if image is None:
-        image_pil = _dummy_image()
-    else:
-        try:
-            image_pil = Image.fromarray(image.astype("uint8"), "RGB")
-        except Exception:
-            _print_exc("[hotornot] Failed to convert input to PIL. Using dummy image.")
-            image_pil = _dummy_image()
-    try:
-        gender = detect_gender(image_pil)
-        age_group = detect_age_group(image_pil)
-        if gender == "man":
-            positive_terms = ["a handsome man", "a charming man", "an attractive man"]
-            negative_terms = ["an ugly man", "a gross man", "a hideous man"]
-        elif gender == "woman":
-            positive_terms = [
-                "a beautiful woman",
-                "a cute woman",
-                "an attractive woman",
-            ]
-            negative_terms = ["an ugly woman", "a gross woman", "a hideous woman"]
-        else:
-            positive_terms = [
-                "a hot person",
-                "a beautiful person",
-                "an attractive person",
-            ]
-            negative_terms = ["an ugly person", "a gross person", "a hideous person"]
-        composite, hotness, second, attractiveness = score_with_terms(
-            image_pil, positive_terms, negative_terms
-        )
-        return composite, hotness, second, attractiveness, gender, age_group
-    except Exception:
-        _print_exc("[hotornot] Unexpected error")
-        return DEFAULT_OUTPUT
 # ============================================================
@@ -195,8 +31,8 @@ def hotornot(image):
 CSS = f"""
 #fixed_img_component img,
 #fixed_img_component canvas {{
-    width: {FIXED_IMG_W}px !important;
-    height: {FIXED_IMG_H}px !important;
     object-fit: contain !important;
 }}
 """
@@ -212,8 +48,8 @@ with gr.Blocks(css=CSS) as demo:
             label="Upload Image",
             type="numpy",
             image_mode="RGB",
-            height=FIXED_IMG_H,
-            width=FIXED_IMG_W,
             elem_id="fixed_img_component",
         )

 import gradio as gr
+from core import create_huggingface_scorer, Config
 # ============================================================
+# Configuration
 # ============================================================
+REPO_ID = "sayantan47/clip-vit-b32-onnx"  # <-- change this if needed
 MODEL_FILENAME = "onnx/model.onnx"
 # ============================================================
+# Initialize Model
 # ============================================================
+print("Loading HuggingFace model...")
+scorer = create_huggingface_scorer(REPO_ID, MODEL_FILENAME)
+if not scorer.model.is_loaded():
+    print("WARNING: Model failed to load. App will return default values.")
 # ============================================================
 # Gradio callback
 # ============================================================
 def hotornot(image):
+    """Main Gradio callback function."""
+    return scorer.evaluate_image(image)
 # ============================================================
 CSS = f"""
 #fixed_img_component img,
 #fixed_img_component canvas {{
+    width: {Config.FIXED_IMG_W}px !important;
+    height: {Config.FIXED_IMG_H}px !important;
     object-fit: contain !important;
 }}
 """
             label="Upload Image",
             type="numpy",
             image_mode="RGB",
+            height=Config.FIXED_IMG_H,
+            width=Config.FIXED_IMG_W,
             elem_id="fixed_img_component",
         )

core.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import os
+import sys
+import traceback
+import numpy as np
+import onnxruntime as ort
+from transformers import CLIPProcessor
+from PIL import Image
+from typing import Optional, List, Tuple, Union
+from abc import ABC, abstractmethod
+# ============================================================
+# Configuration
+# ============================================================
+class Config:
+    DEFAULT_OUTPUT = (0.0, 0.0, 0.0, 0.0, "unknown", "unknown")
+    FIXED_IMG_W = 300
+    FIXED_IMG_H = 300
+    PROVIDERS = ["CPUExecutionProvider"]  # keep CPU to avoid CUDA DLL issues
+# ============================================================
+# Utilities
+# ============================================================
+def print_exc(prefix: str):
+    """Print exception with prefix to stderr."""
+    print(prefix, file=sys.stderr)
+    traceback.print_exc()
+def softmax_safe(x: np.ndarray, axis: int = -1) -> np.ndarray:
+    """Safe softmax implementation that handles edge cases."""
+    try:
+        x = x - np.max(x, axis=axis, keepdims=True)
+        ex = np.exp(x)
+        denom = np.sum(ex, axis=axis, keepdims=True)
+        denom = np.where(denom == 0, 1.0, denom)
+        return ex / denom
+    except Exception:
+        print_exc("[softmax_safe] failed")
+        return np.ones_like(x) / x.shape[-1]
+def ensure_int64(feed_dict: dict) -> dict:
+    """Convert int32 arrays to int64 for ONNX compatibility."""
+    out = {}
+    for k, v in feed_dict.items():
+        if isinstance(v, np.ndarray) and v.dtype == np.int32:
+            out[k] = v.astype(np.int64)
+        else:
+            out[k] = v
+    return out
+def create_dummy_image(width: int = Config.FIXED_IMG_W, height: int = Config.FIXED_IMG_H) -> Image.Image:
+    """Create a dummy gray image for fallback cases."""
+    return Image.fromarray(np.full((height, width, 3), 127, dtype=np.uint8), "RGB")
+# ============================================================
+# Abstract Model Interface
+# ============================================================
+class ModelInterface(ABC):
+    """Abstract interface for CLIP models."""
+    @abstractmethod
+    def is_loaded(self) -> bool:
+        """Check if model is properly loaded."""
+        pass
+    @abstractmethod
+    def run_inference(self, image_pil: Image.Image, texts: List[str]) -> Optional[np.ndarray]:
+        """Run CLIP inference on image and texts."""
+        pass
+# ============================================================
+# Model Implementations
+# ============================================================
+class HuggingFaceModel(ModelInterface):
+    """CLIP model loaded from Hugging Face Hub."""
+    def __init__(self, repo_id: str, model_filename: str):
+        self.repo_id = repo_id
+        self.model_filename = model_filename
+        self.processor = None
+        self.session = None
+        self._load_model()
+    def _load_model(self):
+        """Load model and processor from Hugging Face Hub."""
+        try:
+            from huggingface_hub import hf_hub_download
+            # Download model.onnx
+            model_path = hf_hub_download(
+                repo_id=self.repo_id,
+                filename=self.model_filename,
+                local_dir="hf_cache",
+                local_dir_use_symlinks=False,
+                resume_download=True,
+            )
+            # Load processor (tokenizer + preproc files) from the same repo
+            self.processor = CLIPProcessor.from_pretrained(self.repo_id)
+            self.session = ort.InferenceSession(model_path, providers=Config.PROVIDERS)
+        except Exception:
+            print_exc("[HuggingFaceModel] Failed to download/load model from HF Hub.")
+            self.processor, self.session = None, None
+    def is_loaded(self) -> bool:
+        """Check if model is properly loaded."""
+        return self.processor is not None and self.session is not None
+    def run_inference(self, image_pil: Image.Image, texts: List[str]) -> Optional[np.ndarray]:
+        """Run CLIP inference on image and texts."""
+        if not self.is_loaded():
+            return None
+        try:
+            inputs = self.processor(
+                text=texts, images=image_pil, return_tensors="np", padding=True
+            )
+            ort_inputs = ensure_int64(inputs)
+            outputs = self.session.run(None, ort_inputs)
+            logits_per_image = outputs[0]  # (1, n_texts)
+            probs = softmax_safe(logits_per_image, axis=-1)[0]
+            return probs
+        except Exception:
+            print_exc("[HuggingFaceModel] Inference failed")
+            return None
+class LocalModel(ModelInterface):
+    """CLIP model loaded from local files."""
+    def __init__(self, model_path: str, processor_path: Optional[str] = None):
+        self.model_path = model_path
+        self.processor_path = processor_path
+        self.processor = None
+        self.session = None
+        self._load_model()
+    def _load_model(self):
+        """Load model and processor from local files."""
+        try:
+            # Load ONNX model
+            if not os.path.exists(self.model_path):
+                raise FileNotFoundError(f"Model file not found: {self.model_path}")
+            self.session = ort.InferenceSession(self.model_path, providers=Config.PROVIDERS)
+            # Load processor
+            if self.processor_path and os.path.exists(self.processor_path):
+                self.processor = CLIPProcessor.from_pretrained(self.processor_path)
+            else:
+                # Fallback to a default processor if local processor not available
+                print("[LocalModel] Using default CLIP processor")
+                self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+        except Exception:
+            print_exc("[LocalModel] Failed to load local model.")
+            self.processor, self.session = None, None
+    def is_loaded(self) -> bool:
+        """Check if model is properly loaded."""
+        return self.processor is not None and self.session is not None
+    def run_inference(self, image_pil: Image.Image, texts: List[str]) -> Optional[np.ndarray]:
+        """Run CLIP inference on image and texts."""
+        if not self.is_loaded():
+            return None
+        try:
+            inputs = self.processor(
+                text=texts, images=image_pil, return_tensors="np", padding=True
+            )
+            ort_inputs = ensure_int64(inputs)
+            outputs = self.session.run(None, ort_inputs)
+            logits_per_image = outputs[0]  # (1, n_texts)
+            probs = softmax_safe(logits_per_image, axis=-1)[0]
+            return probs
+        except Exception:
+            print_exc("[LocalModel] Inference failed")
+            return None
+# ============================================================
+# Core Scoring Logic
+# ============================================================
+class HotOrNotScorer:
+    """Core logic for hot-or-not scoring using CLIP models."""
+    def __init__(self, model: ModelInterface):
+        self.model = model
+    def _run_clip(self, image_pil: Image.Image, texts: List[str]) -> Optional[np.ndarray]:
+        """Run CLIP inference wrapper."""
+        return self.model.run_inference(image_pil, texts)
+    def detect_gender(self, image_pil: Image.Image) -> str:
+        """Detect gender from image."""
+        texts = ["a man", "a woman"]
+        probs = self._run_clip(image_pil, texts)
+        if probs is None:
+            return "unknown"
+        return "man" if int(np.argmax(probs)) == 0 else "woman"
+    def detect_age_group(self, image_pil: Image.Image) -> str:
+        """Detect age group from image."""
+        texts = ["a young person", "a middle-aged person", "an old person"]
+        probs = self._run_clip(image_pil, texts)
+        if probs is None:
+            return "unknown"
+        return ["young", "middle-aged", "old"][int(np.argmax(probs))]
+    def score_with_terms(self, image_pil: Image.Image, positive_terms: List[str], negative_terms: List[str]) -> Tuple[float, float, float, float]:
+        """Score image with positive and negative terms."""
+        probs_all = []
+        for pos, neg in zip(positive_terms, negative_terms):
+            probs = self._run_clip(image_pil, [pos, neg])
+            if probs is None or len(probs) != 2:
+                return (
+                    Config.DEFAULT_OUTPUT[0],
+                    Config.DEFAULT_OUTPUT[1],
+                    Config.DEFAULT_OUTPUT[2],
+                    Config.DEFAULT_OUTPUT[3],
+                )
+            probs_all.append(probs)
+        s1 = round((probs_all[0][0] - probs_all[0][1] + 1) * 50, 2)
+        s2 = round((probs_all[1][0] - probs_all[1][1] + 1) * 50, 2)
+        s3 = round((probs_all[2][0] - probs_all[2][1] + 1) * 50, 2)
+        positive_probs = [p[0] for p in probs_all]
+        negative_probs = [p[1] for p in probs_all]
+        hot_score = float(np.mean(positive_probs))
+        ugly_score = float(np.mean(negative_probs))
+        composite = round(((hot_score - ugly_score) + 1) * 50, 2)
+        return composite, s1, s2, s3
+    def evaluate_image(self, image: Union[np.ndarray, Image.Image, None]) -> Tuple[float, float, float, float, str, str]:
+        """Main evaluation function that returns complete scoring."""
+        if not self.model.is_loaded():
+            return Config.DEFAULT_OUTPUT
+        # Handle input image
+        if image is None:
+            image_pil = create_dummy_image()
+        else:
+            try:
+                if isinstance(image, np.ndarray):
+                    image_pil = Image.fromarray(image.astype("uint8"), "RGB")
+                elif isinstance(image, Image.Image):
+                    image_pil = image
+                else:
+                    raise ValueError("Unsupported image type")
+            except Exception:
+                print_exc("[evaluate_image] Failed to convert input to PIL. Using dummy image.")
+                image_pil = create_dummy_image()
+        try:
+            # Detect attributes
+            gender = self.detect_gender(image_pil)
+            age_group = self.detect_age_group(image_pil)
+            # Define terms based on detected gender
+            if gender == "man":
+                positive_terms = ["a handsome man", "a charming man", "an attractive man"]
+                negative_terms = ["an ugly man", "a gross man", "a hideous man"]
+            elif gender == "woman":
+                positive_terms = [
+                    "a beautiful woman",
+                    "a cute woman",
+                    "an attractive woman",
+                ]
+                negative_terms = ["an ugly woman", "a gross woman", "a hideous woman"]
+            else:
+                positive_terms = [
+                    "a hot person",
+                    "a beautiful person",
+                    "an attractive person",
+                ]
+                negative_terms = ["an ugly person", "a gross person", "a hideous person"]
+            # Calculate scores
+            composite, hotness, second, attractiveness = self.score_with_terms(
+                image_pil, positive_terms, negative_terms
+            )
+            return composite, hotness, second, attractiveness, gender, age_group
+        except Exception:
+            print_exc("[evaluate_image] Unexpected error")
+            return Config.DEFAULT_OUTPUT
+# ============================================================
+# Factory Functions
+# ============================================================
+def create_huggingface_scorer(repo_id: str = "sayantan47/clip-vit-b32-onnx", model_filename: str = "onnx/model.onnx") -> HotOrNotScorer:
+    """Create a scorer using HuggingFace model."""
+    model = HuggingFaceModel(repo_id, model_filename)
+    return HotOrNotScorer(model)
+def create_local_scorer(model_path: str, processor_path: Optional[str] = None) -> HotOrNotScorer:
+    """Create a scorer using local model."""
+    model = LocalModel(model_path, processor_path)
+    return HotOrNotScorer(model)