from typing import Dict, List, Any
from transformers import AutoFeatureExtractor, EfficientNetForImageClassification
import torch
from PIL import Image
import io
import base64

class EndpointHandler:
    def __init__(self, path=""):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Load feature extractor
        self.feature_extractor = AutoFeatureExtractor.from_pretrained(path)
        
        # Load model
        self.model = EfficientNetForImageClassification.from_pretrained(path)
        
        # Replace the classification head with a regression head
        self.model.classifier = torch.nn.Linear(self.model.classifier.in_features, 1)
        
        # Load custom weights
        self.model.load_state_dict(torch.load(f"{path}/model.pt", map_location=self.device))
        self.model.to(self.device)
        self.model.eval()

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        # Get the image data from the request
        image_bytes = data.get("inputs", "")
        
        # Decode and open the image
        image = Image.open(io.BytesIO(base64.b64decode(image_bytes))).convert('RGB')
        
        # Prepare the image for the model
        inputs = self.feature_extractor(images=image, return_tensors="pt")
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        # Make prediction
        with torch.no_grad():
            outputs = self.model(**inputs)
        
        prediction = outputs.logits.item()  # For regression, we directly use the output
        
        return [{"prediction": float(prediction)}]