submission-bert

Sleeping

File size: 13,503 Bytes

998e8ac

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Image task notebook template\n",
    "## Loading the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastapi import APIRouter\n",
    "from datetime import datetime\n",
    "from datasets import load_dataset\n",
    "from sklearn.metrics import accuracy_score, precision_score, recall_score\n",
    "\n",
    "import random\n",
    "\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "from tasks.utils.evaluation import ImageEvaluationRequest\n",
    "from tasks.utils.emissions import tracker, clean_emissions_data, get_space_info\n",
    "from tasks.image import parse_boxes,compute_iou,compute_max_iou"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading the datasets and splitting them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4f62b23ca587477d9f37430e687bf951",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/7.72k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--pyronear--pyro-sdis. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
      "  warnings.warn(message)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "70735dd748e343119b5a7cd966dcd0f0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00000-of-00007.parquet:   0%|          | 0.00/433M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "903c3227c24649f1a0424e039d74d303",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00001-of-00007.parquet:   0%|          | 0.00/434M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8795b7696f124715b9d52287d5cd4ee0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00002-of-00007.parquet:   0%|          | 0.00/432M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4b6c1240bf024d61bf913584d13834f5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00003-of-00007.parquet:   0%|          | 0.00/428M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd5f8172a31f4fd79d489db96ede9c21",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00004-of-00007.parquet:   0%|          | 0.00/431M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "416af82dba3a4ab7ad13190703c90757",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00005-of-00007.parquet:   0%|          | 0.00/429M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6819ad85508641a1a64bea34303446ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train-00006-of-00007.parquet:   0%|          | 0.00/431M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "90a7f85c802b4330b502c8bbd3cca7f9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "val-00000-of-00001.parquet:   0%|          | 0.00/407M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b93f2f19aafb43e2b8db0fd7bb3ebd34",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/29537 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c14c0f2cde184c959970dfccaa26b2d2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating val split:   0%|          | 0/4099 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request = ImageEvaluationRequest()\n",
    "\n",
    "# Load and prepare the dataset\n",
    "dataset = load_dataset(request.dataset_name)\n",
    "\n",
    "# Split dataset\n",
    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
    "test_dataset = train_test[\"test\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Start tracking emissions\n",
    "tracker.start()\n",
    "tracker.start_task(\"inference\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE CODE HERE\n",
    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
    "#--------------------------------------------------------------------------------------------   \n",
    "\n",
    "# Make random predictions (placeholder for actual model inference)\n",
    "\n",
    "predictions = []\n",
    "true_labels = []\n",
    "pred_boxes = []\n",
    "true_boxes_list = []  # List of lists, each inner list contains boxes for one image\n",
    "\n",
    "for example in test_dataset:\n",
    "    # Parse true annotation (YOLO format: class_id x_center y_center width height)\n",
    "    annotation = example.get(\"annotations\", \"\").strip()\n",
    "    has_smoke = len(annotation) > 0\n",
    "    true_labels.append(int(has_smoke))\n",
    "    \n",
    "    # Make random classification prediction\n",
    "    pred_has_smoke = random.random() > 0.5\n",
    "    predictions.append(int(pred_has_smoke))\n",
    "    \n",
    "    # If there's a true box, parse it and make random box prediction\n",
    "    if has_smoke:\n",
    "        # Parse all true boxes from the annotation\n",
    "        image_true_boxes = parse_boxes(annotation)\n",
    "        true_boxes_list.append(image_true_boxes)\n",
    "        \n",
    "        # For baseline, make one random box prediction per image\n",
    "        # In a real model, you might want to predict multiple boxes\n",
    "        random_box = [\n",
    "            random.random(),  # x_center\n",
    "            random.random(),  # y_center\n",
    "            random.random() * 0.5,  # width (max 0.5)\n",
    "            random.random() * 0.5   # height (max 0.5)\n",
    "        ]\n",
    "        pred_boxes.append(random_box)\n",
    "\n",
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE STOPS HERE\n",
    "#--------------------------------------------------------------------------------------------   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stop tracking emissions\n",
    "emissions_data = tracker.stop_task()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Calculate classification metrics\n",
    "classification_accuracy = accuracy_score(true_labels, predictions)\n",
    "classification_precision = precision_score(true_labels, predictions)\n",
    "classification_recall = recall_score(true_labels, predictions)\n",
    "\n",
    "# Calculate mean IoU for object detection (only for images with smoke)\n",
    "# For each image, we compute the max IoU between the predicted box and all true boxes\n",
    "ious = []\n",
    "for true_boxes, pred_box in zip(true_boxes_list, pred_boxes):\n",
    "    max_iou = compute_max_iou(true_boxes, pred_box)\n",
    "    ious.append(max_iou)\n",
    "\n",
    "mean_iou = float(np.mean(ious)) if ious else 0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'submission_timestamp': '2025-01-22T15:57:37.288173',\n",
       " 'classification_accuracy': 0.5001692620176033,\n",
       " 'classification_precision': 0.8397129186602871,\n",
       " 'classification_recall': 0.4972677595628415,\n",
       " 'mean_iou': 0.002819781629108398,\n",
       " 'energy_consumed_wh': 0.779355299496116,\n",
       " 'emissions_gco2eq': 0.043674291628462855,\n",
       " 'emissions_data': {'run_id': '4e750cd5-60f0-444c-baee-b5f7b31f784b',\n",
       "  'duration': 51.72819679998793,\n",
       "  'emissions': 4.3674291628462856e-05,\n",
       "  'emissions_rate': 8.445163379568943e-07,\n",
       "  'cpu_power': 42.5,\n",
       "  'gpu_power': 0.0,\n",
       "  'ram_power': 11.755242347717285,\n",
       "  'cpu_energy': 0.0006104993474311617,\n",
       "  'gpu_energy': 0,\n",
       "  'ram_energy': 0.00016885595206495442,\n",
       "  'energy_consumed': 0.0007793552994961161,\n",
       "  'country_name': 'France',\n",
       "  'country_iso_code': 'FRA',\n",
       "  'region': 'île-de-france',\n",
       "  'cloud_provider': '',\n",
       "  'cloud_region': '',\n",
       "  'os': 'Windows-11-10.0.22631-SP0',\n",
       "  'python_version': '3.12.7',\n",
       "  'codecarbon_version': '3.0.0_rc0',\n",
       "  'cpu_count': 12,\n",
       "  'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
       "  'gpu_count': None,\n",
       "  'gpu_model': None,\n",
       "  'ram_total_size': 31.347312927246094,\n",
       "  'tracking_mode': 'machine',\n",
       "  'on_cloud': 'N',\n",
       "  'pue': 1.0},\n",
       " 'dataset_config': {'dataset_name': 'pyronear/pyro-sdis',\n",
       "  'test_size': 0.2,\n",
       "  'test_seed': 42}}"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "# Prepare results dictionary\n",
    "results = {\n",
    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
    "    \"classification_accuracy\": float(classification_accuracy),\n",
    "    \"classification_precision\": float(classification_precision),\n",
    "    \"classification_recall\": float(classification_recall),\n",
    "    \"mean_iou\": mean_iou,\n",
    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
    "    \"dataset_config\": {\n",
    "        \"dataset_name\": request.dataset_name,\n",
    "        \"test_size\": request.test_size,\n",
    "        \"test_seed\": request.test_seed\n",
    "    }\n",
    "}\n",
    "results"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}