{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Image task notebook template\n", "## Loading the necessary libraries" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from fastapi import APIRouter\n", "from datetime import datetime\n", "from datasets import load_dataset\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score\n", "\n", "import random\n", "\n", "import sys\n", "sys.path.append('../')\n", "\n", "from tasks.utils.evaluation import ImageEvaluationRequest\n", "from tasks.utils.emissions import tracker, clean_emissions_data, get_space_info\n", "from tasks.image import parse_boxes,compute_iou,compute_max_iou" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading the datasets and splitting them" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4f62b23ca587477d9f37430e687bf951", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/7.72k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--pyronear--pyro-sdis. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", " warnings.warn(message)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "70735dd748e343119b5a7cd966dcd0f0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00000-of-00007.parquet: 0%| | 0.00/433M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "903c3227c24649f1a0424e039d74d303", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00001-of-00007.parquet: 0%| | 0.00/434M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8795b7696f124715b9d52287d5cd4ee0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00002-of-00007.parquet: 0%| | 0.00/432M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4b6c1240bf024d61bf913584d13834f5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00003-of-00007.parquet: 0%| | 0.00/428M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cd5f8172a31f4fd79d489db96ede9c21", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00004-of-00007.parquet: 0%| | 0.00/431M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "416af82dba3a4ab7ad13190703c90757", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00005-of-00007.parquet: 0%| | 0.00/429M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6819ad85508641a1a64bea34303446ac", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train-00006-of-00007.parquet: 0%| | 0.00/431M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "90a7f85c802b4330b502c8bbd3cca7f9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "val-00000-of-00001.parquet: 0%| | 0.00/407M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b93f2f19aafb43e2b8db0fd7bb3ebd34", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/29537 [00:00<?, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c14c0f2cde184c959970dfccaa26b2d2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating val split: 0%| | 0/4099 [00:00<?, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "request = ImageEvaluationRequest()\n", "\n", "# Load and prepare the dataset\n", "dataset = load_dataset(request.dataset_name)\n", "\n", "# Split dataset\n", "train_test = dataset[\"train\"]\n", "test_dataset = dataset[\"val\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Baseline" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Start tracking emissions\n", "tracker.start()\n", "tracker.start_task(\"inference\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "\n", "#--------------------------------------------------------------------------------------------\n", "# YOUR MODEL INFERENCE CODE HERE\n", "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n", "#-------------------------------------------------------------------------------------------- \n", "\n", "# Make random predictions (placeholder for actual model inference)\n", "\n", "predictions = []\n", "true_labels = []\n", "pred_boxes = []\n", "true_boxes_list = [] # List of lists, each inner list contains boxes for one image\n", "\n", "for example in test_dataset:\n", " # Parse true annotation (YOLO format: class_id x_center y_center width height)\n", " annotation = example.get(\"annotations\", \"\").strip()\n", " has_smoke = len(annotation) > 0\n", " true_labels.append(int(has_smoke))\n", " \n", " # Make random classification prediction\n", " pred_has_smoke = random.random() > 0.5\n", " predictions.append(int(pred_has_smoke))\n", " \n", " # If there's a true box, parse it and make random box prediction\n", " if has_smoke:\n", " # Parse all true boxes from the annotation\n", " image_true_boxes = parse_boxes(annotation)\n", " true_boxes_list.append(image_true_boxes)\n", " \n", " # For baseline, make one random box prediction per image\n", " # In a real model, you might want to predict multiple boxes\n", " random_box = [\n", " random.random(), # x_center\n", " random.random(), # y_center\n", " random.random() * 0.5, # width (max 0.5)\n", " random.random() * 0.5 # height (max 0.5)\n", " ]\n", " pred_boxes.append(random_box)\n", "\n", "\n", "#--------------------------------------------------------------------------------------------\n", "# YOUR MODEL INFERENCE STOPS HERE\n", "#-------------------------------------------------------------------------------------------- " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Stop tracking emissions\n", "emissions_data = tracker.stop_task()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "\n", "# Calculate classification metrics\n", "classification_accuracy = accuracy_score(true_labels, predictions)\n", "classification_precision = precision_score(true_labels, predictions)\n", "classification_recall = recall_score(true_labels, predictions)\n", "\n", "# Calculate mean IoU for object detection (only for images with smoke)\n", "# For each image, we compute the max IoU between the predicted box and all true boxes\n", "ious = []\n", "for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):\n", " max_iou = compute_max_iou(true_boxes, pred_box)\n", " ious.append(max_iou)\n", "\n", "mean_iou = float(np.mean(ious)) if ious else 0.0" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'submission_timestamp': '2025-01-22T15:57:37.288173',\n", " 'classification_accuracy': 0.5001692620176033,\n", " 'classification_precision': 0.8397129186602871,\n", " 'classification_recall': 0.4972677595628415,\n", " 'mean_iou': 0.002819781629108398,\n", " 'energy_consumed_wh': 0.779355299496116,\n", " 'emissions_gco2eq': 0.043674291628462855,\n", " 'emissions_data': {'run_id': '4e750cd5-60f0-444c-baee-b5f7b31f784b',\n", " 'duration': 51.72819679998793,\n", " 'emissions': 4.3674291628462856e-05,\n", " 'emissions_rate': 8.445163379568943e-07,\n", " 'cpu_power': 42.5,\n", " 'gpu_power': 0.0,\n", " 'ram_power': 11.755242347717285,\n", " 'cpu_energy': 0.0006104993474311617,\n", " 'gpu_energy': 0,\n", " 'ram_energy': 0.00016885595206495442,\n", " 'energy_consumed': 0.0007793552994961161,\n", " 'country_name': 'France',\n", " 'country_iso_code': 'FRA',\n", " 'region': 'île-de-france',\n", " 'cloud_provider': '',\n", " 'cloud_region': '',\n", " 'os': 'Windows-11-10.0.22631-SP0',\n", " 'python_version': '3.12.7',\n", " 'codecarbon_version': '3.0.0_rc0',\n", " 'cpu_count': 12,\n", " 'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n", " 'gpu_count': None,\n", " 'gpu_model': None,\n", " 'ram_total_size': 31.347312927246094,\n", " 'tracking_mode': 'machine',\n", " 'on_cloud': 'N',\n", " 'pue': 1.0},\n", " 'dataset_config': {'dataset_name': 'pyronear/pyro-sdis',\n", " 'test_size': 0.2,\n", " 'test_seed': 42}}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# Prepare results dictionary\n", "results = {\n", " \"submission_timestamp\": datetime.now().isoformat(),\n", " \"classification_accuracy\": float(classification_accuracy),\n", " \"classification_precision\": float(classification_precision),\n", " \"classification_recall\": float(classification_recall),\n", " \"mean_iou\": mean_iou,\n", " \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n", " \"emissions_gco2eq\": emissions_data.emissions * 1000,\n", " \"emissions_data\": clean_emissions_data(emissions_data),\n", " \"dataset_config\": {\n", " \"dataset_name\": request.dataset_name,\n", " \"test_size\": request.test_size,\n", " \"test_seed\": request.test_seed\n", " }\n", "}\n", "results" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }