audio-heka-ai

Paused

App Files Files Community

ariel-eddie commited on Jan 31

Commit

61358d9

verified ·

1 Parent(s): 77545c6

Updated Image task with test model inference

Browse files

Files changed (1) hide show

tasks/image.py +128 -25

tasks/image.py CHANGED Viewed

@@ -1,22 +1,113 @@
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
-import numpy as np
 from sklearn.metrics import accuracy_score, precision_score, recall_score
-import random
-import os
 from .utils.evaluation import ImageEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from dotenv import load_dotenv
-load_dotenv()
 router = APIRouter()
-DESCRIPTION = "Random Baseline"
 ROUTE = "/image"
 def parse_boxes(annotation_string):
     """Parse multiple boxes from a single annotation string.
     Each box has 5 values: class_id, x_center, y_center, width, height"""
@@ -30,6 +121,7 @@ def parse_boxes(annotation_string):
             boxes.append(box)
     return boxes
 def compute_iou(box1, box2):
     """Compute Intersection over Union (IoU) between two YOLO format boxes."""
     # Convert YOLO format (x_center, y_center, width, height) to corners
@@ -59,6 +151,7 @@ def compute_iou(box1, box2):
     return intersection / (union + 1e-6)
 def compute_max_iou(true_boxes, pred_box):
     """Compute maximum IoU between a predicted box and all true boxes"""
     max_iou = 0
@@ -67,9 +160,10 @@ def compute_max_iou(true_boxes, pred_box):
         max_iou = max(max_iou, iou)
     return max_iou
 @router.post(ROUTE, tags=["Image Task"],
-             description=DESCRIPTION)
-async def evaluate_image(request: ImageEvaluationRequest):
     """
     Evaluate image classification and object detection for forest fire smoke.
@@ -90,6 +184,10 @@ async def evaluate_image(request: ImageEvaluationRequest):
     # Split dataset
     train_test = dataset["train"]
     test_dataset = dataset["val"]
     # Start tracking emissions
     tracker.start()
@@ -104,33 +202,38 @@ async def evaluate_image(request: ImageEvaluationRequest):
     true_labels = []
     pred_boxes = []
     true_boxes_list = []  # List of lists, each inner list contains boxes for one image
-    for example in test_dataset:
         # Parse true annotation (YOLO format: class_id x_center y_center width height)
         annotation = example.get("annotations", "").strip()
         has_smoke = len(annotation) > 0
         true_labels.append(int(has_smoke))
-        # Make random classification prediction
-        pred_has_smoke = random.random() > 0.5
         predictions.append(int(pred_has_smoke))
-        # If there's a true box, parse it and make random box prediction
         if has_smoke:
             # Parse all true boxes from the annotation
             image_true_boxes = parse_boxes(annotation)
-            true_boxes_list.append(image_true_boxes)
-            # For baseline, make one random box prediction per image
-            # In a real model, you might want to predict multiple boxes
-            random_box = [
-                random.random(),  # x_center
-                random.random(),  # y_center
-                random.random() * 0.5,  # width (max 0.5)
-                random.random() * 0.5   # height (max 0.5)
-            ]
-            pred_boxes.append(random_box)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------

+import os
+import torch
+import numpy as np
+from loguru import logger
+from tqdm import tqdm
+from dotenv import load_dotenv
 from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score, precision_score, recall_score
 from .utils.evaluation import ImageEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from ultralytics import YOLO
+from ultralytics import RTDETR
+from torch.utils.data import DataLoader
+from torchvision import transforms
 from dotenv import load_dotenv
+load_dotenv()
 router = APIRouter()
+DESCRIPTION = "Image to detect smoke"
 ROUTE = "/image"
+device = torch.device("cuda")
+def parse_boxes(annotation_string):
+    """Parse multiple boxes from a single annotation string.
+    Each box has 5 values: class_id, x_center, y_center, width, height"""
+    values = [float(x) for x in annotation_string.strip().split()]
+    boxes = []
+    # Each box has 5 values
+    for i in range(0, len(values), 5):
+        if i + 5 <= len(values):
+            # Skip class_id (first value) and take the next 4 values
+            box = values[i + 1:i + 5]
+            boxes.append(box)
+    return boxes
+def compute_iou(box1, box2):
+    """Compute Intersection over Union (IoU) between two YOLO format boxes."""
+    # Convert YOLO format (x_center, y_center, width, height) to corners
+    def yolo_to_corners(box):
+        x_center, y_center, width, height = box
+        x1 = x_center - width / 2
+        y1 = y_center - height / 2
+        x2 = x_center + width / 2
+        y2 = y_center + height / 2
+        return np.array([x1, y1, x2, y2])
+    box1_corners = yolo_to_corners(box1)
+    box2_corners = yolo_to_corners(box2)
+    # Calculate intersection
+    x1 = max(box1_corners[0], box2_corners[0])
+    y1 = max(box1_corners[1], box2_corners[1])
+    x2 = min(box1_corners[2], box2_corners[2])
+    y2 = min(box1_corners[3], box2_corners[3])
+    intersection = max(0, x2 - x1) * max(0, y2 - y1)
+    # Calculate union
+    box1_area = (box1_corners[2] - box1_corners[0]) * (box1_corners[3] - box1_corners[1])
+    box2_area = (box2_corners[2] - box2_corners[0]) * (box2_corners[3] - box2_corners[1])
+    union = box1_area + box2_area - intersection
+    return intersection / (union + 1e-6)
+def compute_max_iou(true_boxes, pred_box):
+    """Compute maximum IoU between a predicted box and all true boxes"""
+    max_iou = 0
+    for true_box in true_boxes:
+        iou = compute_iou(true_box, pred_box)
+        max_iou = max(max_iou, iou)
+    return max_iou
+class ClampTransform:
+    def __init__(self, min_val=0.0, max_val=1.0):
+        self.min_val = min_val
+        self.max_val = max_val
+    def __call__(self, tensor):
+        return torch.clamp(tensor, min=self.min_val, max=self.max_val)
+def collate_fn(batch):
+    images = [item['image'] for item in batch]
+    annotations = [item.get('annotations', '') for item in batch]
+    # Convert PIL Images to tensors
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        ClampTransform(min_val=0.0, max_val=1.0),
+        transforms.Resize((640, 640))
+    ])
+    images = [transform(img) for img in images]
+    images = torch.stack(images)
+    return {'image': images, 'annotations': annotations}
 def parse_boxes(annotation_string):
     """Parse multiple boxes from a single annotation string.
     Each box has 5 values: class_id, x_center, y_center, width, height"""
             boxes.append(box)
     return boxes
 def compute_iou(box1, box2):
     """Compute Intersection over Union (IoU) between two YOLO format boxes."""
     # Convert YOLO format (x_center, y_center, width, height) to corners
     return intersection / (union + 1e-6)
 def compute_max_iou(true_boxes, pred_box):
     """Compute maximum IoU between a predicted box and all true boxes"""
     max_iou = 0
         max_iou = max(max_iou, iou)
     return max_iou
 @router.post(ROUTE, tags=["Image Task"],
+              description=DESCRIPTION)
+async def evaluate_image(model_path: str = "models/yolo11s_best.pt", request: ImageEvaluationRequest = ImageEvaluationRequest()):
     """
     Evaluate image classification and object detection for forest fire smoke.
     # Split dataset
     train_test = dataset["train"]
     test_dataset = dataset["val"]
+    if("yolo" in model_path):
+        model = YOLO(model_path, task="detect")
+    if("detr" in model_path):
+        model = RTDETR(model_path)
     # Start tracking emissions
     tracker.start()
     true_labels = []
     pred_boxes = []
     true_boxes_list = []  # List of lists, each inner list contains boxes for one image
+    for example in tqdm(test_dataset):
         # Parse true annotation (YOLO format: class_id x_center y_center width height)
         annotation = example.get("annotations", "").strip()
         has_smoke = len(annotation) > 0
         true_labels.append(int(has_smoke))
+        image=example["image"]
+        results = model(image, verbose=False)
+        boxes = results[0].boxes.xywh.tolist()
+        pred_has_smoke = len(boxes) > 0
         predictions.append(int(pred_has_smoke))
         if has_smoke:
+            # If there's a true box, parse it and make box prediction
             # Parse all true boxes from the annotation
             image_true_boxes = parse_boxes(annotation)
+            # Predicted bboxes
+            # Iterate through the results
+            for box in boxes:
+                x, y, w, h = box
+                image_width, image_height = image.size
+                x = x / image_width
+                y = y / image_height
+                w_n = w / image_width
+                h_n = h / image_height
+                formatted_box = [x, y, w_n, h_n]
+                pred_boxes.append(formatted_box)
+                true_boxes_list.append(image_true_boxes)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------