import torch
from pathlib import Path
from transformers import CLIPProcessor, CLIPModel
from PIL import Image, ImageDraw
import pytesseract
import requests
import os 
from llm import inference, upload_image

import re


cropped_images_dir = "cropped_images"
os.makedirs(cropped_images_dir, exist_ok=True)

# Load YOLO model
class YOLOModel:
    def __init__(self, model_path="yolov5s.pt"):
        """
        Initialize the YOLO model. Downloads YOLOv5 pretrained model if not available.
        """
        torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
        self.model = torch.hub.load("ultralytics/yolov5", "custom", path=model_path, force_reload=True)
        # self.model2 = YOLOv10.from_pretrained("Ultralytics/Yolov8")
        # print(f'YOLO Model:\n\n{self.model}')
        # self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        
        # # print(f'CLIP Model:\n\n{self.clip_model}')
        # self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
        # self.category_brands = {
        #     "electronics": ["Samsung", "Apple", "Sony", "LG", "Panasonic"],
        #     "furniture": ["Ikea", "Ashley", "La-Z-Boy", "Wayfair", "West Elm"],
        #     "appliances": ["Whirlpool", "GE", "Samsung", "LG", "Bosch"],
        #     "vehicles": ["Tesla", "Toyota", "Ford", "Honda", "Chevrolet"],
        #     "chair": ["Ikea", "Ashley", "Wayfair", "La-Z-Boy", "Herman Miller"],
        #     "microwave": ["Samsung", "Panasonic", "Sharp", "LG", "Whirlpool"],
        #     "table": ["Ikea", "Wayfair", "Ashley", "CB2", "West Elm"],
        #     "oven": ["Whirlpool", "GE", "Samsung", "Bosch", "LG"],
        #     "potted plant": ["The Sill", "PlantVine", "Lowe's", "Home Depot", "UrbanStems"],
        #     "couch": ["Ikea", "Ashley", "Wayfair", "La-Z-Boy", "CushionCo"],
        #     "cow": ["Angus", "Hereford", "Jersey", "Holstein", "Charolais"],
        #     "bed": ["Tempur-Pedic", "Ikea", "Sealy", "Serta", "Sleep Number"],
        #     "tv": ["Samsung", "LG", "Sony", "Vizio", "TCL"],
        #     "bin": ["Rubbermaid", "Sterilite", "Hefty", "Glad", "Simplehuman"],
        #     "refrigerator": ["Whirlpool", "GE", "Samsung", "LG", "Bosch"],
        #     "laptop": ["Dell", "HP", "Apple", "Lenovo", "Asus"],
        #     "smartphone": ["Apple", "Samsung", "Google", "OnePlus", "Huawei"],
        #     "camera": ["Canon", "Nikon", "Sony", "Fujifilm", "Panasonic"],
        #     "toaster": ["Breville", "Cuisinart", "Black+Decker", "Hamilton Beach", "Oster"],
        #     "fan": ["Dyson", "Honeywell", "Lasko", "Vornado", "Bionaire"],
        #     "vacuum cleaner": ["Dyson", "Shark", "Roomba", "Hoover", "Bissell"]
        # }


    def predict_clip(self, image, brand_names):
        """
        Predict the most probable brand using CLIP.
        """
        inputs = self.clip_processor(
            text=brand_names,
            images=image,
            return_tensors="pt",
            padding=True
        )
        # print(f'Inputs to clip processor:{inputs}')
        outputs = self.clip_model(**inputs)
        logits_per_image = outputs.logits_per_image
        probs = logits_per_image.softmax(dim=1)  # Convert logits to probabilities
        best_idx = probs.argmax().item()
        return brand_names[best_idx], probs[0, best_idx].item()


    def predict_text(self, image):
        try:
            # Convert image to grayscale
            grayscale = image.convert('L')
            
            # Perform OCR using pytesseract
            text = pytesseract.image_to_string(grayscale)
            
            # Return the stripped text if successful
            return text.strip()
        except Exception as e:
            # Log the error for debugging purposes
            print(f"Error during text prediction: {e}")
            
            # Return an empty string if OCR fails
            return ""



    def predict(self, image_path):
        """
        Run YOLO inference on an image.

        :param image_path: Path to the input image
        :return: List of predictions with labels and bounding boxes
        """
        results = self.model(image_path)
        image = Image.open(image_path).convert("RGB")
        draw = ImageDraw.Draw(image)
        predictions = results.pandas().xyxy[0]  # Get predictions as pandas DataFrame
        print(f'YOLO predictions:\n\n{predictions}')
        output = []
        for idx, row in predictions.iterrows():
            category = row['name']
            confidence = row['confidence']
            bbox = [row["xmin"], row["ymin"], row["xmax"], row["ymax"]]

            # Crop the detected region
            cropped_image = image.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
            cropped_image_path = os.path.join(cropped_images_dir, f"crop_{idx}.jpg")
            cropped_image.save(cropped_image_path, "JPEG")

            # uploading to cloud for getting URL to pass into LLM
            print(f'Uploading now to image url')
            image_url = upload_image.upload_image_to_imgbb(cropped_image_path)
            print(f'Image URL received as{image_url}')
            # inferencing llm for possible brands
            result_llms = inference.get_name(image_url, category)
            # possible_brands_llm = re.findall(r"-\s*(.+)", possible_brands_mixed)

            # if len(possible_brands_llm)>0:
            #     predicted_brand, clip_confidence = self.predict_clip(cropped_image, possible_brands_llm)
            # else:
            #     predicted_brand, clip_confidence = "Unknown", 0.0
            

            '''
            # Match category to possible brands
            if category in self.category_brands:
                possible_brands = self.category_brands[category]
                print(f'Predicting with CLIP:\n\n')
                predicted_brand, clip_confidence = self.predict_clip(cropped_image, possible_brands)
            else:
                predicted_brand, clip_confidence = "Unknown", 0.0
            '''


            detected_text = self.predict_text(cropped_image)
            print(f'Details:{detected_text}')
            print(f'Predicted brand: {result_llms["model"]}')
            # Draw bounding box and label on the image
            draw.rectangle(bbox, outline="red", width=3)
            draw.text(
                (bbox[0], bbox[1] - 10),
                f'{result_llms["brand"]})',
                fill="red"
            )

            # Append result
            output.append({
                "category": category,
                "bbox": bbox,
                "confidence": confidence,
                "category_llm":result_llms["brand"],
                "predicted_brand": result_llms["model"],
                # "clip_confidence": clip_confidence,
                "price":result_llms["price"],
                "details":result_llms["description"],
                "detected_text":detected_text,
                "image_url":image_url
            })

            valid_indices = set(range(len(predictions)))

            # Iterate over all files in the directory
            for filename in os.listdir(cropped_images_dir):
                # Check if the filename matches the pattern for cropped images
                if filename.startswith("crop_") and filename.endswith(".jpg"):
                    # Extract the index from the filename
                    try:
                        file_idx = int(filename.split("_")[1].split(".")[0])
                        if file_idx not in valid_indices:
                            # Delete the file if its index is not valid
                            file_path = os.path.join(cropped_images_dir, filename)
                            os.remove(file_path)
                            print(f"Deleted excess file: {filename}")
                    except ValueError:
                        # Skip files that don't match the pattern
                        continue

        return output