from typing import Dict, List, Any from transformers import AutoFeatureExtractor, EfficientNetForImageClassification import torch from PIL import Image import io import base64 class EndpointHandler: def __init__(self, path=""): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load feature extractor self.feature_extractor = AutoFeatureExtractor.from_pretrained(path) # Load model self.model = EfficientNetForImageClassification.from_pretrained(path) # Replace the classification head with a regression head self.model.classifier = torch.nn.Linear(self.model.classifier.in_features, 1) # Load custom weights self.model.load_state_dict(torch.load(f"{path}/model.pt", map_location=self.device)) self.model.to(self.device) self.model.eval() def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: # Get the image data from the request image_bytes = data.get("inputs", "") # Decode and open the image image = Image.open(io.BytesIO(base64.b64decode(image_bytes))).convert('RGB') # Prepare the image for the model inputs = self.feature_extractor(images=image, return_tensors="pt") inputs = {k: v.to(self.device) for k, v in inputs.items()} # Make prediction with torch.no_grad(): outputs = self.model(**inputs) prediction = outputs.logits.item() # For regression, we directly use the output return [{"prediction": float(prediction)}]