# Image task notebook template
## Loading the necessary libraries

In [13]:
from fastapi import APIRouter
from datetime import datetime
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score

import random

import sys
sys.path.append('../')

from tasks.utils.evaluation import ImageEvaluationRequest
from tasks.utils.emissions import tracker, clean_emissions_data, get_space_info
from tasks.image import parse_boxes,compute_iou,compute_max_iou

## Loading the datasets and splitting them

In [4]:
request = ImageEvaluationRequest()

# Load and prepare the dataset
dataset = load_dataset(request.dataset_name)

# Split dataset
train_test = dataset["train"]
test_dataset = dataset["val"]

README.md:   0%|          | 0.00/7.72k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


train-00000-of-00007.parquet:   0%|          | 0.00/433M [00:00<?, ?B/s]

train-00001-of-00007.parquet:   0%|          | 0.00/434M [00:00<?, ?B/s]

train-00002-of-00007.parquet:   0%|          | 0.00/432M [00:00<?, ?B/s]

train-00003-of-00007.parquet:   0%|          | 0.00/428M [00:00<?, ?B/s]

train-00004-of-00007.parquet:   0%|          | 0.00/431M [00:00<?, ?B/s]

train-00005-of-00007.parquet:   0%|          | 0.00/429M [00:00<?, ?B/s]

train-00006-of-00007.parquet:   0%|          | 0.00/431M [00:00<?, ?B/s]

val-00000-of-00001.parquet:   0%|          | 0.00/407M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/29537 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/4099 [00:00<?, ? examples/s]

## Random Baseline

In [10]:
# Start tracking emissions
tracker.start()
tracker.start_task("inference")

In [11]:

#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE CODE HERE
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
#--------------------------------------------------------------------------------------------   

# Make random predictions (placeholder for actual model inference)

predictions = []
true_labels = []
pred_boxes = []
true_boxes_list = []  # List of lists, each inner list contains boxes for one image

for example in test_dataset:
    # Parse true annotation (YOLO format: class_id x_center y_center width height)
    annotation = example.get("annotations", "").strip()
    has_smoke = len(annotation) > 0
    true_labels.append(int(has_smoke))
    
    # Make random classification prediction
    pred_has_smoke = random.random() > 0.5
    predictions.append(int(pred_has_smoke))
    
    # If there's a true box, parse it and make random box prediction
    if has_smoke:
        # Parse all true boxes from the annotation
        image_true_boxes = parse_boxes(annotation)
        true_boxes_list.append(image_true_boxes)
        
        # For baseline, make one random box prediction per image
        # In a real model, you might want to predict multiple boxes
        random_box = [
            random.random(),  # x_center
            random.random(),  # y_center
            random.random() * 0.5,  # width (max 0.5)
            random.random() * 0.5   # height (max 0.5)
        ]
        pred_boxes.append(random_box)


#--------------------------------------------------------------------------------------------
# YOUR MODEL INFERENCE STOPS HERE
#--------------------------------------------------------------------------------------------   

In [None]:
# Stop tracking emissions
emissions_data = tracker.stop_task()

In [15]:
import numpy as np

# Calculate classification metrics
classification_accuracy = accuracy_score(true_labels, predictions)
classification_precision = precision_score(true_labels, predictions)
classification_recall = recall_score(true_labels, predictions)

# Calculate mean IoU for object detection (only for images with smoke)
# For each image, we compute the max IoU between the predicted box and all true boxes
ious = []
for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):
    max_iou = compute_max_iou(true_boxes, pred_box)
    ious.append(max_iou)

mean_iou = float(np.mean(ious)) if ious else 0.0

In [18]:

# Prepare results dictionary
results = {
    "submission_timestamp": datetime.now().isoformat(),
    "classification_accuracy": float(classification_accuracy),
    "classification_precision": float(classification_precision),
    "classification_recall": float(classification_recall),
    "mean_iou": mean_iou,
    "energy_consumed_wh": emissions_data.energy_consumed * 1000,
    "emissions_gco2eq": emissions_data.emissions * 1000,
    "emissions_data": clean_emissions_data(emissions_data),
    "dataset_config": {
        "dataset_name": request.dataset_name,
        "test_size": request.test_size,
        "test_seed": request.test_seed
    }
}
results

{'submission_timestamp': '2025-01-22T15:57:37.288173',
 'classification_accuracy': 0.5001692620176033,
 'classification_precision': 0.8397129186602871,
 'classification_recall': 0.4972677595628415,
 'mean_iou': 0.002819781629108398,
 'energy_consumed_wh': 0.779355299496116,
 'emissions_gco2eq': 0.043674291628462855,
 'emissions_data': {'run_id': '4e750cd5-60f0-444c-baee-b5f7b31f784b',
  'duration': 51.72819679998793,
  'emissions': 4.3674291628462856e-05,
  'emissions_rate': 8.445163379568943e-07,
  'cpu_power': 42.5,
  'gpu_power': 0.0,
  'ram_power': 11.755242347717285,
  'cpu_energy': 0.0006104993474311617,
  'gpu_energy': 0,
  'ram_energy': 0.00016885595206495442,
  'energy_consumed': 0.0007793552994961161,
  'country_name': 'France',
  'country_iso_code': 'FRA',
  'region': 'île-de-france',
  'cloud_provider': '',
  'cloud_region': '',
  'os': 'Windows-11-10.0.22631-SP0',
  'python_version': '3.12.7',
  'codecarbon_version': '3.0.0_rc0',
  'cpu_count': 12,
  'cpu_model': '13th Gen