{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text task notebook template\n",
    "## Loading the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n",
      "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n",
      "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n",
      "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
      "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n",
      " Windows OS detected: Please install Intel Power Gadget to measure CPU\n",
      "\n",
      "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n",
      "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n",
      "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n",
      "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n",
      "[codecarbon INFO @ 19:48:11] No GPU found.\n",
      "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n",
      "[codecarbon INFO @ 19:48:11]   Platform system: Windows-11-10.0.22631-SP0\n",
      "[codecarbon INFO @ 19:48:11]   Python version: 3.12.7\n",
      "[codecarbon INFO @ 19:48:11]   CodeCarbon version: 3.0.0_rc0\n",
      "[codecarbon INFO @ 19:48:11]   Available RAM : 31.347 GB\n",
      "[codecarbon INFO @ 19:48:11]   CPU count: 12\n",
      "[codecarbon INFO @ 19:48:11]   CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n",
      "[codecarbon INFO @ 19:48:11]   GPU count: None\n",
      "[codecarbon INFO @ 19:48:11]   GPU model: None\n",
      "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n"
     ]
    }
   ],
   "source": [
    "from fastapi import APIRouter\n",
    "from datetime import datetime\n",
    "from datasets import load_dataset\n",
    "from sklearn.metrics import accuracy_score\n",
    "import random\n",
    "\n",
    "import sys\n",
    "sys.path.append('../tasks')\n",
    "\n",
    "from utils.evaluation import TextEvaluationRequest\n",
    "from utils.emissions import tracker, clean_emissions_data, get_space_info\n",
    "\n",
    "\n",
    "# Define the label mapping\n",
    "LABEL_MAPPING = {\n",
    "    \"0_not_relevant\": 0,\n",
    "    \"1_not_happening\": 1,\n",
    "    \"2_not_human\": 2,\n",
    "    \"3_not_bad\": 3,\n",
    "    \"4_solutions_harmful_unnecessary\": 4,\n",
    "    \"5_science_unreliable\": 5,\n",
    "    \"6_proponents_biased\": 6,\n",
    "    \"7_fossil_fuels_needed\": 7\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading the datasets and splitting them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "668da7bf85434e098b95c3ec447d78fe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
      "  warnings.warn(message)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5b68d43359eb429395da8be7d4b15556",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "train.parquet:   0%|          | 0.00/1.21M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "140a304773914e9db8f698eabeb40298",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split:   0%|          | 0/6091 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6d04e8ab1906400e8e0029949dc523a5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Map:   0%|          | 0/6091 [00:00<?, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "request = TextEvaluationRequest()\n",
    "\n",
    "# Load and prepare the dataset\n",
    "dataset = load_dataset(request.dataset_name)\n",
    "\n",
    "# Convert string labels to integers\n",
    "dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n",
    "\n",
    "# Split dataset\n",
    "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n",
    "test_dataset = train_test[\"test\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Random Baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Start tracking emissions\n",
    "tracker.start()\n",
    "tracker.start_task(\"inference\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1,\n",
       " 7,\n",
       " 6,\n",
       " 6,\n",
       " 2,\n",
       " 0,\n",
       " 1,\n",
       " 7,\n",
       " 3,\n",
       " 6,\n",
       " 6,\n",
       " 3,\n",
       " 6,\n",
       " 6,\n",
       " 5,\n",
       " 0,\n",
       " 2,\n",
       " 6,\n",
       " 2,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 1,\n",
       " 3,\n",
       " 6,\n",
       " 4,\n",
       " 2,\n",
       " 1,\n",
       " 4,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 1,\n",
       " 5,\n",
       " 5,\n",
       " 1,\n",
       " 2,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 3,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 0,\n",
       " 3,\n",
       " 3,\n",
       " 3,\n",
       " 4,\n",
       " 1,\n",
       " 4,\n",
       " 4,\n",
       " 1,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 2,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 4,\n",
       " 2,\n",
       " 0,\n",
       " 1,\n",
       " 6,\n",
       " 3,\n",
       " 2,\n",
       " 5,\n",
       " 5,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 0,\n",
       " 1,\n",
       " 5,\n",
       " 5,\n",
       " 7,\n",
       " 4,\n",
       " 6,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 2,\n",
       " 5,\n",
       " 3,\n",
       " 3,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 4,\n",
       " 5,\n",
       " 7,\n",
       " 0,\n",
       " 3,\n",
       " 1,\n",
       " 4,\n",
       " 6,\n",
       " 0,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 2,\n",
       " 2,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 3,\n",
       " 4,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 3,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 6,\n",
       " 4,\n",
       " 5,\n",
       " 4,\n",
       " 3,\n",
       " 6,\n",
       " 0,\n",
       " 4,\n",
       " 0,\n",
       " 1,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 3,\n",
       " 3,\n",
       " 0,\n",
       " 1,\n",
       " 2,\n",
       " 4,\n",
       " 4,\n",
       " 3,\n",
       " 1,\n",
       " 2,\n",
       " 4,\n",
       " 3,\n",
       " 0,\n",
       " 5,\n",
       " 3,\n",
       " 6,\n",
       " 3,\n",
       " 6,\n",
       " 1,\n",
       " 3,\n",
       " 4,\n",
       " 5,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 4,\n",
       " 4,\n",
       " 5,\n",
       " 3,\n",
       " 1,\n",
       " 7,\n",
       " 4,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 0,\n",
       " 5,\n",
       " 3,\n",
       " 6,\n",
       " 3,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 4,\n",
       " 1,\n",
       " 2,\n",
       " 6,\n",
       " 3,\n",
       " 4,\n",
       " 4,\n",
       " 5,\n",
       " 1,\n",
       " 5,\n",
       " 4,\n",
       " 0,\n",
       " 1,\n",
       " 7,\n",
       " 3,\n",
       " 6,\n",
       " 0,\n",
       " 7,\n",
       " 4,\n",
       " 6,\n",
       " 3,\n",
       " 0,\n",
       " 0,\n",
       " 4,\n",
       " 6,\n",
       " 6,\n",
       " 4,\n",
       " 0,\n",
       " 5,\n",
       " 7,\n",
       " 5,\n",
       " 1,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 4,\n",
       " 5,\n",
       " 1,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 3,\n",
       " 0,\n",
       " 0,\n",
       " 6,\n",
       " 6,\n",
       " 2,\n",
       " 0,\n",
       " 7,\n",
       " 4,\n",
       " 5,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 4,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 6,\n",
       " 1,\n",
       " 3,\n",
       " 5,\n",
       " 5,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 3,\n",
       " 7,\n",
       " 4,\n",
       " 3,\n",
       " 5,\n",
       " 5,\n",
       " 7,\n",
       " 2,\n",
       " 6,\n",
       " 1,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 0,\n",
       " 1,\n",
       " 7,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 5,\n",
       " 6,\n",
       " 3,\n",
       " 2,\n",
       " 3,\n",
       " 0,\n",
       " 4,\n",
       " 3,\n",
       " 5,\n",
       " 6,\n",
       " 0,\n",
       " 0,\n",
       " 6,\n",
       " 6,\n",
       " 1,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 5,\n",
       " 7,\n",
       " 6,\n",
       " 3,\n",
       " 5,\n",
       " 6,\n",
       " 0,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 1,\n",
       " 2,\n",
       " 1,\n",
       " 5,\n",
       " 3,\n",
       " 0,\n",
       " 3,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 7,\n",
       " 0,\n",
       " 1,\n",
       " 0,\n",
       " 4,\n",
       " 1,\n",
       " 1,\n",
       " 0,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 7,\n",
       " 6,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 4,\n",
       " 3,\n",
       " 4,\n",
       " 3,\n",
       " 3,\n",
       " 2,\n",
       " 5,\n",
       " 1,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 3,\n",
       " 2,\n",
       " 6,\n",
       " 4,\n",
       " 4,\n",
       " 1,\n",
       " 2,\n",
       " 6,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 1,\n",
       " 3,\n",
       " 5,\n",
       " 2,\n",
       " 6,\n",
       " 4,\n",
       " 6,\n",
       " 7,\n",
       " 0,\n",
       " 5,\n",
       " 1,\n",
       " 6,\n",
       " 5,\n",
       " 3,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 3,\n",
       " 0,\n",
       " 0,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 1,\n",
       " 5,\n",
       " 1,\n",
       " 2,\n",
       " 6,\n",
       " 2,\n",
       " 6,\n",
       " 0,\n",
       " 2,\n",
       " 1,\n",
       " 5,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 0,\n",
       " 5,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 1,\n",
       " 0,\n",
       " 5,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 5,\n",
       " 1,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 2,\n",
       " 3,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 2,\n",
       " 6,\n",
       " 0,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 0,\n",
       " 7,\n",
       " 0,\n",
       " 6,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 5,\n",
       " 5,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 7,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 1,\n",
       " 4,\n",
       " 7,\n",
       " 5,\n",
       " 4,\n",
       " 0,\n",
       " 0,\n",
       " 7,\n",
       " 0,\n",
       " 0,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 5,\n",
       " 3,\n",
       " 0,\n",
       " 3,\n",
       " 6,\n",
       " 5,\n",
       " 7,\n",
       " 2,\n",
       " 6,\n",
       " 7,\n",
       " 5,\n",
       " 2,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 7,\n",
       " 5,\n",
       " 4,\n",
       " 1,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 4,\n",
       " 0,\n",
       " 6,\n",
       " 3,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 4,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 3,\n",
       " 4,\n",
       " 3,\n",
       " 5,\n",
       " 4,\n",
       " 4,\n",
       " 5,\n",
       " 0,\n",
       " 1,\n",
       " 3,\n",
       " 7,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 3,\n",
       " 3,\n",
       " 3,\n",
       " 5,\n",
       " 3,\n",
       " 3,\n",
       " 4,\n",
       " 0,\n",
       " 1,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 7,\n",
       " 5,\n",
       " 0,\n",
       " 0,\n",
       " 5,\n",
       " 2,\n",
       " 6,\n",
       " 2,\n",
       " 6,\n",
       " 7,\n",
       " 6,\n",
       " 5,\n",
       " 7,\n",
       " 5,\n",
       " 7,\n",
       " 1,\n",
       " 6,\n",
       " 6,\n",
       " 0,\n",
       " 4,\n",
       " 7,\n",
       " 3,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 5,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 0,\n",
       " 0,\n",
       " 3,\n",
       " 3,\n",
       " 7,\n",
       " 3,\n",
       " 0,\n",
       " 1,\n",
       " 1,\n",
       " 6,\n",
       " 0,\n",
       " 0,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 6,\n",
       " 7,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 4,\n",
       " 5,\n",
       " 4,\n",
       " 4,\n",
       " 3,\n",
       " 6,\n",
       " 5,\n",
       " 2,\n",
       " 0,\n",
       " 6,\n",
       " 0,\n",
       " 6,\n",
       " 4,\n",
       " 3,\n",
       " 5,\n",
       " 7,\n",
       " 7,\n",
       " 5,\n",
       " 5,\n",
       " 1,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 6,\n",
       " 7,\n",
       " 6,\n",
       " 5,\n",
       " 2,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 4,\n",
       " 7,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " 0,\n",
       " 6,\n",
       " 0,\n",
       " 2,\n",
       " 6,\n",
       " 6,\n",
       " 2,\n",
       " 3,\n",
       " 0,\n",
       " 5,\n",
       " 0,\n",
       " 5,\n",
       " 7,\n",
       " 2,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 4,\n",
       " 0,\n",
       " 7,\n",
       " 1,\n",
       " 4,\n",
       " 5,\n",
       " 0,\n",
       " 5,\n",
       " 5,\n",
       " 2,\n",
       " 0,\n",
       " 2,\n",
       " 5,\n",
       " 5,\n",
       " 6,\n",
       " 3,\n",
       " 4,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 5,\n",
       " 0,\n",
       " 7,\n",
       " 2,\n",
       " 3,\n",
       " 7,\n",
       " 2,\n",
       " 4,\n",
       " 0,\n",
       " 5,\n",
       " 7,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 6,\n",
       " 4,\n",
       " 3,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 0,\n",
       " 3,\n",
       " 4,\n",
       " 3,\n",
       " 5,\n",
       " 2,\n",
       " 4,\n",
       " 0,\n",
       " 3,\n",
       " 6,\n",
       " 1,\n",
       " 3,\n",
       " 1,\n",
       " 4,\n",
       " 3,\n",
       " 3,\n",
       " 3,\n",
       " 0,\n",
       " 7,\n",
       " 6,\n",
       " 2,\n",
       " 4,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 1,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 4,\n",
       " 3,\n",
       " 0,\n",
       " 7,\n",
       " 3,\n",
       " 1,\n",
       " 2,\n",
       " 1,\n",
       " 6,\n",
       " 4,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 1,\n",
       " 5,\n",
       " 1,\n",
       " 6,\n",
       " 3,\n",
       " 0,\n",
       " 2,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 0,\n",
       " 1,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 5,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 3,\n",
       " 4,\n",
       " 1,\n",
       " 6,\n",
       " 2,\n",
       " 4,\n",
       " 4,\n",
       " 6,\n",
       " 4,\n",
       " 5,\n",
       " 7,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 4,\n",
       " 7,\n",
       " 4,\n",
       " 3,\n",
       " 3,\n",
       " 6,\n",
       " 1,\n",
       " 2,\n",
       " 0,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 5,\n",
       " 6,\n",
       " 5,\n",
       " 7,\n",
       " 5,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 2,\n",
       " 1,\n",
       " 6,\n",
       " 5,\n",
       " 7,\n",
       " 0,\n",
       " 0,\n",
       " 5,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 7,\n",
       " 5,\n",
       " 2,\n",
       " 5,\n",
       " 4,\n",
       " 2,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 3,\n",
       " 6,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 6,\n",
       " 0,\n",
       " 1,\n",
       " 3,\n",
       " 3,\n",
       " 6,\n",
       " 4,\n",
       " 6,\n",
       " 4,\n",
       " 6,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 2,\n",
       " 6,\n",
       " 6,\n",
       " 2,\n",
       " 4,\n",
       " 3,\n",
       " 3,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 1,\n",
       " 1,\n",
       " 7,\n",
       " 7,\n",
       " 6,\n",
       " 1,\n",
       " 7,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 4,\n",
       " 2,\n",
       " 2,\n",
       " 3,\n",
       " 0,\n",
       " 1,\n",
       " 4,\n",
       " 0,\n",
       " 4,\n",
       " 6,\n",
       " 5,\n",
       " 3,\n",
       " 2,\n",
       " 3,\n",
       " 2,\n",
       " 3,\n",
       " 6,\n",
       " 2,\n",
       " 1,\n",
       " 4,\n",
       " 7,\n",
       " 6,\n",
       " 4,\n",
       " 5,\n",
       " 6,\n",
       " 7,\n",
       " 7,\n",
       " 2,\n",
       " 0,\n",
       " 5,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 6,\n",
       " 6,\n",
       " 5,\n",
       " 4,\n",
       " 4,\n",
       " 7,\n",
       " 0,\n",
       " 5,\n",
       " 1,\n",
       " 7,\n",
       " 0,\n",
       " 3,\n",
       " 1,\n",
       " 7,\n",
       " 0,\n",
       " 1,\n",
       " 4,\n",
       " 7,\n",
       " 5,\n",
       " 0,\n",
       " 4,\n",
       " 0,\n",
       " 0,\n",
       " 1,\n",
       " 0,\n",
       " 6,\n",
       " 4,\n",
       " 0,\n",
       " 5,\n",
       " 4,\n",
       " 6,\n",
       " 6,\n",
       " 7,\n",
       " 2,\n",
       " 6,\n",
       " 2,\n",
       " 6,\n",
       " 0,\n",
       " 3,\n",
       " 2,\n",
       " 2,\n",
       " 1,\n",
       " 5,\n",
       " 4,\n",
       " 7,\n",
       " 6,\n",
       " 6,\n",
       " 2,\n",
       " 5,\n",
       " 5,\n",
       " 5,\n",
       " 0,\n",
       " 3,\n",
       " 5,\n",
       " 4,\n",
       " 5,\n",
       " 7,\n",
       " 5,\n",
       " 0,\n",
       " 5,\n",
       " 0,\n",
       " 0,\n",
       " 2,\n",
       " 0,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 2,\n",
       " 4,\n",
       " 3,\n",
       " 4,\n",
       " 1,\n",
       " 7,\n",
       " 2,\n",
       " 1,\n",
       " 0,\n",
       " 3,\n",
       " 0,\n",
       " 3,\n",
       " 1,\n",
       " 1,\n",
       " 0,\n",
       " 5,\n",
       " 3,\n",
       " 1,\n",
       " 2,\n",
       " 5,\n",
       " 6,\n",
       " 7,\n",
       " 6,\n",
       " 7,\n",
       " 0,\n",
       " 2,\n",
       " 6,\n",
       " 3,\n",
       " 1,\n",
       " 5,\n",
       " 4,\n",
       " 2,\n",
       " 4,\n",
       " 6,\n",
       " 5,\n",
       " 2,\n",
       " 7,\n",
       " ...]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE CODE HERE\n",
    "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n",
    "#--------------------------------------------------------------------------------------------   \n",
    "\n",
    "# Make random predictions (placeholder for actual model inference)\n",
    "true_labels = test_dataset[\"label\"]\n",
    "predictions = [random.randint(0, 7) for _ in range(len(true_labels))]\n",
    "\n",
    "predictions\n",
    "\n",
    "#--------------------------------------------------------------------------------------------\n",
    "# YOUR MODEL INFERENCE STOPS HERE\n",
    "#--------------------------------------------------------------------------------------------   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n",
      "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n",
      "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n",
      "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n",
      "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Stop tracking emissions\n",
    "emissions_data = tracker.stop_task()\n",
    "emissions_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10090237899917966"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Calculate accuracy\n",
    "accuracy = accuracy_score(true_labels, predictions)\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n",
       " 'accuracy': 0.10090237899917966,\n",
       " 'energy_consumed_wh': 0.7195645902801733,\n",
       " 'emissions_gco2eq': 0.040323680074710634,\n",
       " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n",
       "  'duration': 47.736408500000834,\n",
       "  'emissions': 4.032368007471064e-05,\n",
       "  'emissions_rate': 8.444466886328872e-07,\n",
       "  'cpu_power': 42.5,\n",
       "  'gpu_power': 0.0,\n",
       "  'ram_power': 11.755242347717285,\n",
       "  'cpu_energy': 0.0005636615353475565,\n",
       "  'gpu_energy': 0,\n",
       "  'ram_energy': 0.00015590305493261682,\n",
       "  'energy_consumed': 0.0007195645902801733,\n",
       "  'country_name': 'France',\n",
       "  'country_iso_code': 'FRA',\n",
       "  'region': 'île-de-france',\n",
       "  'cloud_provider': '',\n",
       "  'cloud_region': '',\n",
       "  'os': 'Windows-11-10.0.22631-SP0',\n",
       "  'python_version': '3.12.7',\n",
       "  'codecarbon_version': '3.0.0_rc0',\n",
       "  'cpu_count': 12,\n",
       "  'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n",
       "  'gpu_count': None,\n",
       "  'gpu_model': None,\n",
       "  'ram_total_size': 31.347312927246094,\n",
       "  'tracking_mode': 'machine',\n",
       "  'on_cloud': 'N',\n",
       "  'pue': 1.0},\n",
       " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n",
       "  'test_size': 0.2,\n",
       "  'test_seed': 42}}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Prepare results dictionary\n",
    "results = {\n",
    "    \"submission_timestamp\": datetime.now().isoformat(),\n",
    "    \"accuracy\": float(accuracy),\n",
    "    \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n",
    "    \"emissions_gco2eq\": emissions_data.emissions * 1000,\n",
    "    \"emissions_data\": clean_emissions_data(emissions_data),\n",
    "    \"dataset_config\": {\n",
    "        \"dataset_name\": request.dataset_name,\n",
    "        \"test_size\": request.test_size,\n",
    "        \"test_seed\": request.test_seed\n",
    "    }\n",
    "}\n",
    "\n",
    "results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Development of the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "90f50ab19698484489f36976745efad3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\models--facebook--bart-large-mnli. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
      "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
      "  warnings.warn(message)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6e3974d8ff284603821f7beca9bd353d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bc29cb379c644b00b1bdf61d5426d99d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "635503cf819747c9a83f22aa4f2f11db",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3a5f53e451e8483ca7c33f42245abd13",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "84f922d1b68a4a0faa5e920d004efca0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Device set to use cpu\n"
     ]
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "classifier = pipeline(\"zero-shot-classification\",\n",
    "                      model=\"facebook/bart-large-mnli\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "sequence_to_classify = \"one day I will see the world\"\n",
    "\n",
    "candidate_labels = [\n",
    "    \"Not related to climate change disinformation\",\n",
    "    \"Climate change is not real and not happening\",\n",
    "    \"Climate change is not human-induced\",\n",
    "    \"Climate change impacts are not that bad\",\n",
    "    \"Climate change solutions are harmful and unnecessary\",\n",
    "    \"Climate change science is unreliable\",\n",
    "    \"Climate change proponents are biased\",\n",
    "    \"Fossil fuels are needed to address climate change\"\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'sequence': 'one day I will see the world',\n",
       " 'labels': ['Fossil fuels are needed to address climate change',\n",
       "  'Climate change science is unreliable',\n",
       "  'Not related to climate change disinformation',\n",
       "  'Climate change proponents are biased',\n",
       "  'Climate change impacts are not that bad',\n",
       "  'Climate change solutions are harmful and unnecessary',\n",
       "  'Climate change is not human-induced',\n",
       "  'Climate change is not real and not happening'],\n",
       " 'scores': [0.16242119669914246,\n",
       "  0.15683825314044952,\n",
       "  0.1564282774925232,\n",
       "  0.14603719115257263,\n",
       "  0.12794046103954315,\n",
       "  0.10180754214525223,\n",
       "  0.0936085507273674,\n",
       "  0.0549185685813427]}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classifier(sequence_to_classify, candidate_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[codecarbon WARNING @ 11:00:07] Already started tracking\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5d66a13f76a4411d95b62d4a73012495",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "0it [00:00, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[codecarbon WARNING @ 11:05:57] Background scheduler didn't run for a long period (349s), results might be inaccurate\n",
      "[codecarbon INFO @ 11:05:57] Energy consumed for RAM : 0.018069 kWh. RAM Power : 11.755242347717285 W\n",
      "[codecarbon INFO @ 11:05:57] Delta energy consumed for CPU with constant : 0.004122 kWh, power : 42.5 W\n",
      "[codecarbon INFO @ 11:05:57] Energy consumed for All CPU : 0.065327 kWh\n",
      "[codecarbon INFO @ 11:05:57] 0.083395 kWh of electricity used since the beginning.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "EmissionsData(timestamp='2025-01-22T11:05:57', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=349.19709450000664, emissions=0.0002949120266226386, emissions_rate=8.445461750018632e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.004122396676597424, gpu_energy=0, ram_energy=0.0011402244733631148, energy_consumed=0.005262621149960539, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Start tracking emissions\n",
    "tracker.start()\n",
    "tracker.start_task(\"inference\")\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "predictions = []\n",
    "\n",
    "\n",
    "\n",
    "# Option 1: Simple loop approach\n",
    "\n",
    "for i, text in tqdm(enumerate(test_dataset[\"quote\"])):\n",
    "\n",
    "    result = classifier(text, candidate_labels)\n",
    "\n",
    "    # Get index of highest scoring label\n",
    "\n",
    "    pred_label = candidate_labels.index(result[\"labels\"][0])\n",
    "\n",
    "    predictions.append(pred_label)\n",
    "    if i == 100:\n",
    "        break\n",
    "\n",
    "\n",
    "# Stop tracking emissions\n",
    "emissions_data = tracker.stop_task()\n",
    "emissions_data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Calculate accuracy\n",
    "accuracy = accuracy_score(true_labels[:100], predictions[:100])\n",
    "accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}