dazpye
/

clip-image

@@ -1,47 +1,62 @@
 import torch
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
 import base64
 import io
 class EndpointHandler:
     def __init__(self, model_dir=None):  # AWS expects model_dir
-        print("Loading model...")
         self.model = CLIPModel.from_pretrained("dazpye/clip-image")
         self.processor = CLIPProcessor.from_pretrained("dazpye/clip-image")
-    def _load_image(self, image_url):
-        """Fetches an image and ensures it is fully loaded."""
         try:
-            print(f"Fetching image from: {image_url}")
-            response = requests.get(image_url, timeout=5)
-            print(f"HTTP Status Code: {response.status_code}")
-            if response.status_code == 200:
-                image_bytes = io.BytesIO(response.content)  # Convert to bytes
-                return Image.open(image_bytes)
-            else:
-                print(f"❌ Failed to fetch image: HTTP {response.status_code}")
         except Exception as e:
-            print(f"❌ Exception in image loading: {e}")
         return None  # Return None if image loading fails
     def __call__(self, data):
         """Main inference function Hugging Face expects."""
-        print("Processing input...")
         text = data.get("text", ["default caption"])  # Default text
         images = data.get("images", [])  # List of images
         # Convert image URLs or base64 strings to PIL images
         pil_images = [self._load_image(img) for img in images if img]
         if not pil_images:
-            return {"error": "No valid images provided."}
         inputs = self.processor(text=text, images=pil_images, return_tensors="pt")
-        print("Running inference...")
         with torch.no_grad():
             outputs = self.model(**inputs)

 import torch
 from transformers import CLIPProcessor, CLIPModel
 from PIL import Image
+import requests
 import base64
 import io
 class EndpointHandler:
     def __init__(self, model_dir=None):  # AWS expects model_dir
+        print("🔄 Loading model...")
         self.model = CLIPModel.from_pretrained("dazpye/clip-image")
         self.processor = CLIPProcessor.from_pretrained("dazpye/clip-image")
+    def _load_image(self, image_data):
+        """Fetches an image from a URL or decodes a base64 image."""
         try:
+            if isinstance(image_data, str):
+                if image_data.startswith("http"):
+                    # Fetch image from URL
+                    print(f"🌐 Fetching image from: {image_data}")
+                    response = requests.get(image_data, timeout=5)
+                    print(f"✅ HTTP Status Code: {response.status_code}")
+                    if response.status_code == 200:
+                        image_bytes = io.BytesIO(response.content)
+                        return Image.open(image_bytes).convert("RGB")
+                    else:
+                        print(f"❌ Failed to fetch image: HTTP {response.status_code}")
+                else:
+                    # Handle base64-encoded image
+                    print("📸 Decoding base64 image...")
+                    return Image.open(io.BytesIO(base64.b64decode(image_data))).convert("RGB")
         except Exception as e:
+            print(f"⚠️ Exception in image loading: {e}")
         return None  # Return None if image loading fails
     def __call__(self, data):
         """Main inference function Hugging Face expects."""
+        print("📥 Processing input...")
+        if "inputs" in data:
+            data = data["inputs"]
         text = data.get("text", ["default caption"])  # Default text
         images = data.get("images", [])  # List of images
         # Convert image URLs or base64 strings to PIL images
         pil_images = [self._load_image(img) for img in images if img]
+        pil_images = [img for img in pil_images if img]  # Remove None values
         if not pil_images:
+            return {"error": "❌ No valid images provided. Check URLs or base64 encoding."}
         inputs = self.processor(text=text, images=pil_images, return_tensors="pt")
+        print("🖥️ Running inference...")
         with torch.no_grad():
             outputs = self.model(**inputs)