{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Text task notebook template\n", "## Loading the necessary libraries" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[codecarbon WARNING @ 19:48:07] Multiple instances of codecarbon are allowed to run at the same time.\n", "[codecarbon INFO @ 19:48:07] [setup] RAM Tracking...\n", "[codecarbon INFO @ 19:48:07] [setup] CPU Tracking...\n", "[codecarbon WARNING @ 19:48:09] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n", "[codecarbon WARNING @ 19:48:09] No CPU tracking mode found. Falling back on CPU constant mode. \n", " Windows OS detected: Please install Intel Power Gadget to measure CPU\n", "\n", "[codecarbon WARNING @ 19:48:11] We saw that you have a 13th Gen Intel(R) Core(TM) i7-1365U but we don't know it. Please contact us.\n", "[codecarbon INFO @ 19:48:11] CPU Model on constant consumption mode: 13th Gen Intel(R) Core(TM) i7-1365U\n", "[codecarbon WARNING @ 19:48:11] No CPU tracking mode found. Falling back on CPU constant mode.\n", "[codecarbon INFO @ 19:48:11] [setup] GPU Tracking...\n", "[codecarbon INFO @ 19:48:11] No GPU found.\n", "[codecarbon INFO @ 19:48:11] >>> Tracker's metadata:\n", "[codecarbon INFO @ 19:48:11] Platform system: Windows-11-10.0.22631-SP0\n", "[codecarbon INFO @ 19:48:11] Python version: 3.12.7\n", "[codecarbon INFO @ 19:48:11] CodeCarbon version: 3.0.0_rc0\n", "[codecarbon INFO @ 19:48:11] Available RAM : 31.347 GB\n", "[codecarbon INFO @ 19:48:11] CPU count: 12\n", "[codecarbon INFO @ 19:48:11] CPU model: 13th Gen Intel(R) Core(TM) i7-1365U\n", "[codecarbon INFO @ 19:48:11] GPU count: None\n", "[codecarbon INFO @ 19:48:11] GPU model: None\n", "[codecarbon INFO @ 19:48:11] Saving emissions data to file c:\\git\\submission-template\\notebooks\\emissions.csv\n" ] } ], "source": [ "from fastapi import APIRouter\n", "from datetime import datetime\n", "from datasets import load_dataset\n", "from sklearn.metrics import accuracy_score\n", "import random\n", "\n", "import sys\n", "sys.path.append('../tasks')\n", "\n", "from utils.evaluation import TextEvaluationRequest\n", "from utils.emissions import tracker, clean_emissions_data, get_space_info\n", "\n", "\n", "# Define the label mapping\n", "LABEL_MAPPING = {\n", " \"0_not_relevant\": 0,\n", " \"1_not_happening\": 1,\n", " \"2_not_human\": 2,\n", " \"3_not_bad\": 3,\n", " \"4_solutions_harmful_unnecessary\": 4,\n", " \"5_science_unreliable\": 5,\n", " \"6_proponents_biased\": 6,\n", " \"7_fossil_fuels_needed\": 7\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading the datasets and splitting them" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "668da7bf85434e098b95c3ec447d78fe", "version_major": 2, "version_minor": 0 }, "text/plain": [ "README.md: 0%| | 0.00/5.18k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\datasets--QuotaClimat--frugalaichallenge-text-train. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", " warnings.warn(message)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5b68d43359eb429395da8be7d4b15556", "version_major": 2, "version_minor": 0 }, "text/plain": [ "train.parquet: 0%| | 0.00/1.21M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "140a304773914e9db8f698eabeb40298", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating train split: 0%| | 0/6091 [00:00<?, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6d04e8ab1906400e8e0029949dc523a5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/6091 [00:00<?, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "request = TextEvaluationRequest()\n", "\n", "# Load and prepare the dataset\n", "dataset = load_dataset(request.dataset_name)\n", "\n", "# Convert string labels to integers\n", "dataset = dataset.map(lambda x: {\"label\": LABEL_MAPPING[x[\"label\"]]})\n", "\n", "# Split dataset\n", "train_test = dataset[\"train\"].train_test_split(test_size=request.test_size, seed=request.test_seed)\n", "test_dataset = train_test[\"test\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random Baseline" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Start tracking emissions\n", "tracker.start()\n", "tracker.start_task(\"inference\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1,\n", " 7,\n", " 6,\n", " 6,\n", " 2,\n", " 0,\n", " 1,\n", " 7,\n", " 3,\n", " 6,\n", " 6,\n", " 3,\n", " 6,\n", " 6,\n", " 5,\n", " 0,\n", " 2,\n", " 6,\n", " 2,\n", " 6,\n", " 5,\n", " 4,\n", " 1,\n", " 3,\n", " 6,\n", " 4,\n", " 2,\n", " 1,\n", " 4,\n", " 0,\n", " 3,\n", " 4,\n", " 1,\n", " 5,\n", " 5,\n", " 1,\n", " 2,\n", " 7,\n", " 6,\n", " 1,\n", " 3,\n", " 1,\n", " 7,\n", " 7,\n", " 0,\n", " 0,\n", " 3,\n", " 3,\n", " 3,\n", " 4,\n", " 1,\n", " 4,\n", " 4,\n", " 1,\n", " 4,\n", " 5,\n", " 6,\n", " 1,\n", " 2,\n", " 2,\n", " 2,\n", " 5,\n", " 2,\n", " 7,\n", " 2,\n", " 7,\n", " 7,\n", " 6,\n", " 4,\n", " 2,\n", " 0,\n", " 1,\n", " 6,\n", " 3,\n", " 2,\n", " 5,\n", " 5,\n", " 2,\n", " 0,\n", " 7,\n", " 0,\n", " 1,\n", " 5,\n", " 5,\n", " 7,\n", " 4,\n", " 6,\n", " 7,\n", " 1,\n", " 7,\n", " 1,\n", " 0,\n", " 3,\n", " 4,\n", " 2,\n", " 5,\n", " 3,\n", " 3,\n", " 3,\n", " 2,\n", " 2,\n", " 1,\n", " 0,\n", " 4,\n", " 5,\n", " 7,\n", " 0,\n", " 3,\n", " 1,\n", " 4,\n", " 6,\n", " 0,\n", " 7,\n", " 1,\n", " 1,\n", " 2,\n", " 2,\n", " 4,\n", " 0,\n", " 4,\n", " 3,\n", " 4,\n", " 4,\n", " 2,\n", " 2,\n", " 3,\n", " 3,\n", " 7,\n", " 4,\n", " 7,\n", " 6,\n", " 4,\n", " 5,\n", " 4,\n", " 3,\n", " 6,\n", " 0,\n", " 4,\n", " 0,\n", " 1,\n", " 3,\n", " 6,\n", " 7,\n", " 3,\n", " 3,\n", " 0,\n", " 1,\n", " 2,\n", " 4,\n", " 4,\n", " 3,\n", " 1,\n", " 2,\n", " 4,\n", " 3,\n", " 0,\n", " 5,\n", " 3,\n", " 6,\n", " 3,\n", " 6,\n", " 1,\n", " 3,\n", " 4,\n", " 5,\n", " 4,\n", " 0,\n", " 7,\n", " 3,\n", " 6,\n", " 7,\n", " 4,\n", " 4,\n", " 5,\n", " 3,\n", " 1,\n", " 7,\n", " 4,\n", " 1,\n", " 0,\n", " 3,\n", " 0,\n", " 5,\n", " 3,\n", " 6,\n", " 3,\n", " 0,\n", " 7,\n", " 2,\n", " 0,\n", " 4,\n", " 1,\n", " 2,\n", " 6,\n", " 3,\n", " 4,\n", " 4,\n", " 5,\n", " 1,\n", " 5,\n", " 4,\n", " 0,\n", " 1,\n", " 7,\n", " 3,\n", " 6,\n", " 0,\n", " 7,\n", " 4,\n", " 6,\n", " 3,\n", " 0,\n", " 0,\n", " 4,\n", " 6,\n", " 6,\n", " 4,\n", " 0,\n", " 5,\n", " 7,\n", " 5,\n", " 1,\n", " 3,\n", " 6,\n", " 2,\n", " 3,\n", " 2,\n", " 4,\n", " 5,\n", " 1,\n", " 5,\n", " 0,\n", " 3,\n", " 3,\n", " 0,\n", " 0,\n", " 6,\n", " 6,\n", " 2,\n", " 0,\n", " 7,\n", " 4,\n", " 5,\n", " 7,\n", " 1,\n", " 0,\n", " 4,\n", " 5,\n", " 1,\n", " 7,\n", " 0,\n", " 7,\n", " 2,\n", " 6,\n", " 1,\n", " 3,\n", " 5,\n", " 5,\n", " 6,\n", " 5,\n", " 4,\n", " 3,\n", " 7,\n", " 4,\n", " 3,\n", " 5,\n", " 5,\n", " 7,\n", " 2,\n", " 6,\n", " 1,\n", " 5,\n", " 0,\n", " 3,\n", " 4,\n", " 2,\n", " 3,\n", " 7,\n", " 0,\n", " 1,\n", " 7,\n", " 6,\n", " 7,\n", " 7,\n", " 5,\n", " 6,\n", " 3,\n", " 2,\n", " 3,\n", " 0,\n", " 4,\n", " 3,\n", " 5,\n", " 6,\n", " 0,\n", " 0,\n", " 6,\n", " 6,\n", " 1,\n", " 4,\n", " 0,\n", " 4,\n", " 2,\n", " 7,\n", " 5,\n", " 7,\n", " 6,\n", " 3,\n", " 5,\n", " 6,\n", " 0,\n", " 4,\n", " 5,\n", " 6,\n", " 1,\n", " 2,\n", " 1,\n", " 5,\n", " 3,\n", " 0,\n", " 3,\n", " 7,\n", " 1,\n", " 0,\n", " 7,\n", " 0,\n", " 1,\n", " 0,\n", " 4,\n", " 1,\n", " 1,\n", " 0,\n", " 7,\n", " 1,\n", " 0,\n", " 7,\n", " 6,\n", " 2,\n", " 3,\n", " 7,\n", " 4,\n", " 3,\n", " 4,\n", " 3,\n", " 3,\n", " 2,\n", " 5,\n", " 1,\n", " 5,\n", " 1,\n", " 7,\n", " 3,\n", " 2,\n", " 6,\n", " 4,\n", " 4,\n", " 1,\n", " 2,\n", " 6,\n", " 7,\n", " 2,\n", " 7,\n", " 1,\n", " 3,\n", " 5,\n", " 2,\n", " 6,\n", " 4,\n", " 6,\n", " 7,\n", " 0,\n", " 5,\n", " 1,\n", " 6,\n", " 5,\n", " 3,\n", " 6,\n", " 5,\n", " 4,\n", " 7,\n", " 6,\n", " 5,\n", " 4,\n", " 3,\n", " 0,\n", " 0,\n", " 1,\n", " 7,\n", " 7,\n", " 6,\n", " 1,\n", " 4,\n", " 5,\n", " 6,\n", " 1,\n", " 5,\n", " 1,\n", " 2,\n", " 6,\n", " 2,\n", " 6,\n", " 0,\n", " 2,\n", " 1,\n", " 5,\n", " 5,\n", " 1,\n", " 7,\n", " 0,\n", " 5,\n", " 5,\n", " 1,\n", " 7,\n", " 7,\n", " 2,\n", " 1,\n", " 0,\n", " 1,\n", " 0,\n", " 5,\n", " 4,\n", " 2,\n", " 7,\n", " 4,\n", " 3,\n", " 6,\n", " 7,\n", " 5,\n", " 1,\n", " 0,\n", " 7,\n", " 2,\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 0,\n", " 3,\n", " 2,\n", " 6,\n", " 0,\n", " 5,\n", " 4,\n", " 7,\n", " 1,\n", " 1,\n", " 0,\n", " 7,\n", " 0,\n", " 6,\n", " 7,\n", " 6,\n", " 1,\n", " 5,\n", " 5,\n", " 7,\n", " 6,\n", " 1,\n", " 7,\n", " 6,\n", " 5,\n", " 4,\n", " 1,\n", " 4,\n", " 7,\n", " 5,\n", " 4,\n", " 0,\n", " 0,\n", " 7,\n", " 0,\n", " 0,\n", " 3,\n", " 6,\n", " 2,\n", " 5,\n", " 3,\n", " 0,\n", " 3,\n", " 6,\n", " 5,\n", " 7,\n", " 2,\n", " 6,\n", " 7,\n", " 5,\n", " 2,\n", " 3,\n", " 6,\n", " 7,\n", " 7,\n", " 7,\n", " 6,\n", " 1,\n", " 7,\n", " 4,\n", " 2,\n", " 7,\n", " 5,\n", " 4,\n", " 1,\n", " 2,\n", " 3,\n", " 7,\n", " 0,\n", " 2,\n", " 7,\n", " 6,\n", " 1,\n", " 4,\n", " 0,\n", " 6,\n", " 3,\n", " 1,\n", " 0,\n", " 3,\n", " 4,\n", " 7,\n", " 7,\n", " 4,\n", " 2,\n", " 1,\n", " 0,\n", " 5,\n", " 1,\n", " 7,\n", " 4,\n", " 6,\n", " 7,\n", " 7,\n", " 3,\n", " 4,\n", " 3,\n", " 5,\n", " 4,\n", " 4,\n", " 5,\n", " 0,\n", " 1,\n", " 3,\n", " 7,\n", " 5,\n", " 4,\n", " 7,\n", " 3,\n", " 3,\n", " 3,\n", " 5,\n", " 3,\n", " 3,\n", " 4,\n", " 0,\n", " 1,\n", " 7,\n", " 4,\n", " 7,\n", " 7,\n", " 5,\n", " 0,\n", " 0,\n", " 5,\n", " 2,\n", " 6,\n", " 2,\n", " 6,\n", " 7,\n", " 6,\n", " 5,\n", " 7,\n", " 5,\n", " 7,\n", " 1,\n", " 6,\n", " 6,\n", " 0,\n", " 4,\n", " 7,\n", " 3,\n", " 0,\n", " 0,\n", " 2,\n", " 5,\n", " 2,\n", " 3,\n", " 7,\n", " 1,\n", " 0,\n", " 3,\n", " 0,\n", " 0,\n", " 3,\n", " 3,\n", " 7,\n", " 3,\n", " 0,\n", " 1,\n", " 1,\n", " 6,\n", " 0,\n", " 0,\n", " 5,\n", " 0,\n", " 3,\n", " 4,\n", " 6,\n", " 7,\n", " 4,\n", " 0,\n", " 4,\n", " 4,\n", " 5,\n", " 4,\n", " 4,\n", " 3,\n", " 6,\n", " 5,\n", " 2,\n", " 0,\n", " 6,\n", " 0,\n", " 6,\n", " 4,\n", " 3,\n", " 5,\n", " 7,\n", " 7,\n", " 5,\n", " 5,\n", " 1,\n", " 5,\n", " 2,\n", " 7,\n", " 7,\n", " 6,\n", " 6,\n", " 7,\n", " 6,\n", " 5,\n", " 2,\n", " 4,\n", " 0,\n", " 4,\n", " 4,\n", " 7,\n", " 5,\n", " 2,\n", " 7,\n", " 0,\n", " 6,\n", " 0,\n", " 2,\n", " 6,\n", " 6,\n", " 2,\n", " 3,\n", " 0,\n", " 5,\n", " 0,\n", " 5,\n", " 7,\n", " 2,\n", " 7,\n", " 4,\n", " 7,\n", " 4,\n", " 0,\n", " 7,\n", " 1,\n", " 4,\n", " 5,\n", " 0,\n", " 5,\n", " 5,\n", " 2,\n", " 0,\n", " 2,\n", " 5,\n", " 5,\n", " 6,\n", " 3,\n", " 4,\n", " 1,\n", " 7,\n", " 7,\n", " 2,\n", " 3,\n", " 2,\n", " 5,\n", " 0,\n", " 7,\n", " 2,\n", " 3,\n", " 7,\n", " 2,\n", " 4,\n", " 0,\n", " 5,\n", " 7,\n", " 3,\n", " 6,\n", " 7,\n", " 6,\n", " 4,\n", " 3,\n", " 6,\n", " 5,\n", " 4,\n", " 0,\n", " 3,\n", " 4,\n", " 3,\n", " 5,\n", " 2,\n", " 4,\n", " 0,\n", " 3,\n", " 6,\n", " 1,\n", " 3,\n", " 1,\n", " 4,\n", " 3,\n", " 3,\n", " 3,\n", " 0,\n", " 7,\n", " 6,\n", " 2,\n", " 4,\n", " 6,\n", " 5,\n", " 4,\n", " 1,\n", " 7,\n", " 6,\n", " 1,\n", " 4,\n", " 3,\n", " 0,\n", " 7,\n", " 3,\n", " 1,\n", " 2,\n", " 1,\n", " 6,\n", " 4,\n", " 7,\n", " 1,\n", " 7,\n", " 1,\n", " 5,\n", " 1,\n", " 6,\n", " 3,\n", " 0,\n", " 2,\n", " 6,\n", " 7,\n", " 7,\n", " 0,\n", " 1,\n", " 4,\n", " 0,\n", " 4,\n", " 5,\n", " 3,\n", " 6,\n", " 2,\n", " 3,\n", " 4,\n", " 1,\n", " 6,\n", " 2,\n", " 4,\n", " 4,\n", " 6,\n", " 4,\n", " 5,\n", " 7,\n", " 1,\n", " 7,\n", " 7,\n", " 4,\n", " 7,\n", " 4,\n", " 3,\n", " 3,\n", " 6,\n", " 1,\n", " 2,\n", " 0,\n", " 0,\n", " 0,\n", " 2,\n", " 5,\n", " 6,\n", " 5,\n", " 7,\n", " 5,\n", " 7,\n", " 1,\n", " 1,\n", " 2,\n", " 1,\n", " 6,\n", " 5,\n", " 7,\n", " 0,\n", " 0,\n", " 5,\n", " 5,\n", " 0,\n", " 3,\n", " 7,\n", " 5,\n", " 2,\n", " 5,\n", " 4,\n", " 2,\n", " 3,\n", " 6,\n", " 2,\n", " 3,\n", " 6,\n", " 0,\n", " 0,\n", " 2,\n", " 6,\n", " 0,\n", " 1,\n", " 3,\n", " 3,\n", " 6,\n", " 4,\n", " 6,\n", " 4,\n", " 6,\n", " 0,\n", " 0,\n", " 2,\n", " 3,\n", " 6,\n", " 2,\n", " 2,\n", " 6,\n", " 6,\n", " 2,\n", " 4,\n", " 3,\n", " 3,\n", " 6,\n", " 7,\n", " 7,\n", " 1,\n", " 1,\n", " 7,\n", " 7,\n", " 6,\n", " 1,\n", " 7,\n", " 0,\n", " 0,\n", " 2,\n", " 4,\n", " 2,\n", " 2,\n", " 3,\n", " 0,\n", " 1,\n", " 4,\n", " 0,\n", " 4,\n", " 6,\n", " 5,\n", " 3,\n", " 2,\n", " 3,\n", " 2,\n", " 3,\n", " 6,\n", " 2,\n", " 1,\n", " 4,\n", " 7,\n", " 6,\n", " 4,\n", " 5,\n", " 6,\n", " 7,\n", " 7,\n", " 2,\n", " 0,\n", " 5,\n", " 5,\n", " 0,\n", " 3,\n", " 6,\n", " 6,\n", " 5,\n", " 4,\n", " 4,\n", " 7,\n", " 0,\n", " 5,\n", " 1,\n", " 7,\n", " 0,\n", " 3,\n", " 1,\n", " 7,\n", " 0,\n", " 1,\n", " 4,\n", " 7,\n", " 5,\n", " 0,\n", " 4,\n", " 0,\n", " 0,\n", " 1,\n", " 0,\n", " 6,\n", " 4,\n", " 0,\n", " 5,\n", " 4,\n", " 6,\n", " 6,\n", " 7,\n", " 2,\n", " 6,\n", " 2,\n", " 6,\n", " 0,\n", " 3,\n", " 2,\n", " 2,\n", " 1,\n", " 5,\n", " 4,\n", " 7,\n", " 6,\n", " 6,\n", " 2,\n", " 5,\n", " 5,\n", " 5,\n", " 0,\n", " 3,\n", " 5,\n", " 4,\n", " 5,\n", " 7,\n", " 5,\n", " 0,\n", " 5,\n", " 0,\n", " 0,\n", " 2,\n", " 0,\n", " 2,\n", " 1,\n", " 0,\n", " 2,\n", " 4,\n", " 3,\n", " 4,\n", " 1,\n", " 7,\n", " 2,\n", " 1,\n", " 0,\n", " 3,\n", " 0,\n", " 3,\n", " 1,\n", " 1,\n", " 0,\n", " 5,\n", " 3,\n", " 1,\n", " 2,\n", " 5,\n", " 6,\n", " 7,\n", " 6,\n", " 7,\n", " 0,\n", " 2,\n", " 6,\n", " 3,\n", " 1,\n", " 5,\n", " 4,\n", " 2,\n", " 4,\n", " 6,\n", " 5,\n", " 2,\n", " 7,\n", " ...]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "#--------------------------------------------------------------------------------------------\n", "# YOUR MODEL INFERENCE CODE HERE\n", "# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.\n", "#-------------------------------------------------------------------------------------------- \n", "\n", "# Make random predictions (placeholder for actual model inference)\n", "true_labels = test_dataset[\"label\"]\n", "predictions = [random.randint(0, 7) for _ in range(len(true_labels))]\n", "\n", "predictions\n", "\n", "#--------------------------------------------------------------------------------------------\n", "# YOUR MODEL INFERENCE STOPS HERE\n", "#-------------------------------------------------------------------------------------------- " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[codecarbon WARNING @ 19:53:32] Background scheduler didn't run for a long period (47s), results might be inaccurate\n", "[codecarbon INFO @ 19:53:32] Energy consumed for RAM : 0.000156 kWh. RAM Power : 11.755242347717285 W\n", "[codecarbon INFO @ 19:53:32] Delta energy consumed for CPU with constant : 0.000564 kWh, power : 42.5 W\n", "[codecarbon INFO @ 19:53:32] Energy consumed for All CPU : 0.000564 kWh\n", "[codecarbon INFO @ 19:53:32] 0.000720 kWh of electricity used since the beginning.\n" ] }, { "data": { "text/plain": [ "EmissionsData(timestamp='2025-01-21T19:53:32', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=47.736408500000834, emissions=4.032368007471064e-05, emissions_rate=8.444466886328872e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.0005636615353475565, gpu_energy=0, ram_energy=0.00015590305493261682, energy_consumed=0.0007195645902801733, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Stop tracking emissions\n", "emissions_data = tracker.stop_task()\n", "emissions_data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.10090237899917966" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calculate accuracy\n", "accuracy = accuracy_score(true_labels, predictions)\n", "accuracy" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'submission_timestamp': '2025-01-21T19:53:46.639165',\n", " 'accuracy': 0.10090237899917966,\n", " 'energy_consumed_wh': 0.7195645902801733,\n", " 'emissions_gco2eq': 0.040323680074710634,\n", " 'emissions_data': {'run_id': '908f2e7e-4bb2-4991-a0f6-56bf8d7eda21',\n", " 'duration': 47.736408500000834,\n", " 'emissions': 4.032368007471064e-05,\n", " 'emissions_rate': 8.444466886328872e-07,\n", " 'cpu_power': 42.5,\n", " 'gpu_power': 0.0,\n", " 'ram_power': 11.755242347717285,\n", " 'cpu_energy': 0.0005636615353475565,\n", " 'gpu_energy': 0,\n", " 'ram_energy': 0.00015590305493261682,\n", " 'energy_consumed': 0.0007195645902801733,\n", " 'country_name': 'France',\n", " 'country_iso_code': 'FRA',\n", " 'region': 'île-de-france',\n", " 'cloud_provider': '',\n", " 'cloud_region': '',\n", " 'os': 'Windows-11-10.0.22631-SP0',\n", " 'python_version': '3.12.7',\n", " 'codecarbon_version': '3.0.0_rc0',\n", " 'cpu_count': 12,\n", " 'cpu_model': '13th Gen Intel(R) Core(TM) i7-1365U',\n", " 'gpu_count': None,\n", " 'gpu_model': None,\n", " 'ram_total_size': 31.347312927246094,\n", " 'tracking_mode': 'machine',\n", " 'on_cloud': 'N',\n", " 'pue': 1.0},\n", " 'dataset_config': {'dataset_name': 'QuotaClimat/frugalaichallenge-text-train',\n", " 'test_size': 0.2,\n", " 'test_seed': 42}}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Prepare results dictionary\n", "results = {\n", " \"submission_timestamp\": datetime.now().isoformat(),\n", " \"accuracy\": float(accuracy),\n", " \"energy_consumed_wh\": emissions_data.energy_consumed * 1000,\n", " \"emissions_gco2eq\": emissions_data.emissions * 1000,\n", " \"emissions_data\": clean_emissions_data(emissions_data),\n", " \"dataset_config\": {\n", " \"dataset_name\": request.dataset_name,\n", " \"test_size\": request.test_size,\n", " \"test_seed\": request.test_seed\n", " }\n", "}\n", "\n", "results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Development of the model" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "90f50ab19698484489f36976745efad3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "config.json: 0%| | 0.00/1.15k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\theo.alvesdacosta\\AppData\\Local\\anaconda3\\Lib\\site-packages\\huggingface_hub\\file_download.py:139: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\theo.alvesdacosta\\.cache\\huggingface\\hub\\models--facebook--bart-large-mnli. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n", "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n", " warnings.warn(message)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6e3974d8ff284603821f7beca9bd353d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "model.safetensors: 0%| | 0.00/1.63G [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bc29cb379c644b00b1bdf61d5426d99d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer_config.json: 0%| | 0.00/26.0 [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "635503cf819747c9a83f22aa4f2f11db", "version_major": 2, "version_minor": 0 }, "text/plain": [ "vocab.json: 0%| | 0.00/899k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3a5f53e451e8483ca7c33f42245abd13", "version_major": 2, "version_minor": 0 }, "text/plain": [ "merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "84f922d1b68a4a0faa5e920d004efca0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tokenizer.json: 0%| | 0.00/1.36M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Device set to use cpu\n" ] } ], "source": [ "from transformers import pipeline\n", "classifier = pipeline(\"zero-shot-classification\",\n", " model=\"facebook/bart-large-mnli\")\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "sequence_to_classify = \"one day I will see the world\"\n", "\n", "candidate_labels = [\n", " \"Not related to climate change disinformation\",\n", " \"Climate change is not real and not happening\",\n", " \"Climate change is not human-induced\",\n", " \"Climate change impacts are not that bad\",\n", " \"Climate change solutions are harmful and unnecessary\",\n", " \"Climate change science is unreliable\",\n", " \"Climate change proponents are biased\",\n", " \"Fossil fuels are needed to address climate change\"\n", "]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'sequence': 'one day I will see the world',\n", " 'labels': ['Fossil fuels are needed to address climate change',\n", " 'Climate change science is unreliable',\n", " 'Not related to climate change disinformation',\n", " 'Climate change proponents are biased',\n", " 'Climate change impacts are not that bad',\n", " 'Climate change solutions are harmful and unnecessary',\n", " 'Climate change is not human-induced',\n", " 'Climate change is not real and not happening'],\n", " 'scores': [0.16242119669914246,\n", " 0.15683825314044952,\n", " 0.1564282774925232,\n", " 0.14603719115257263,\n", " 0.12794046103954315,\n", " 0.10180754214525223,\n", " 0.0936085507273674,\n", " 0.0549185685813427]}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier(sequence_to_classify, candidate_labels)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[codecarbon WARNING @ 11:00:07] Already started tracking\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "5d66a13f76a4411d95b62d4a73012495", "version_major": 2, "version_minor": 0 }, "text/plain": [ "0it [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[codecarbon WARNING @ 11:05:57] Background scheduler didn't run for a long period (349s), results might be inaccurate\n", "[codecarbon INFO @ 11:05:57] Energy consumed for RAM : 0.018069 kWh. RAM Power : 11.755242347717285 W\n", "[codecarbon INFO @ 11:05:57] Delta energy consumed for CPU with constant : 0.004122 kWh, power : 42.5 W\n", "[codecarbon INFO @ 11:05:57] Energy consumed for All CPU : 0.065327 kWh\n", "[codecarbon INFO @ 11:05:57] 0.083395 kWh of electricity used since the beginning.\n" ] }, { "data": { "text/plain": [ "EmissionsData(timestamp='2025-01-22T11:05:57', project_name='codecarbon', run_id='908f2e7e-4bb2-4991-a0f6-56bf8d7eda21', experiment_id='5b0fa12a-3dd7-45bb-9766-cc326314d9f1', duration=349.19709450000664, emissions=0.0002949120266226386, emissions_rate=8.445461750018632e-07, cpu_power=42.5, gpu_power=0.0, ram_power=11.755242347717285, cpu_energy=0.004122396676597424, gpu_energy=0, ram_energy=0.0011402244733631148, energy_consumed=0.005262621149960539, country_name='France', country_iso_code='FRA', region='île-de-france', cloud_provider='', cloud_region='', os='Windows-11-10.0.22631-SP0', python_version='3.12.7', codecarbon_version='3.0.0_rc0', cpu_count=12, cpu_model='13th Gen Intel(R) Core(TM) i7-1365U', gpu_count=None, gpu_model=None, longitude=2.3494, latitude=48.8558, ram_total_size=31.347312927246094, tracking_mode='machine', on_cloud='N', pue=1.0)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Start tracking emissions\n", "tracker.start()\n", "tracker.start_task(\"inference\")\n", "\n", "from tqdm.auto import tqdm\n", "predictions = []\n", "\n", "\n", "\n", "# Option 1: Simple loop approach\n", "\n", "for i, text in tqdm(enumerate(test_dataset[\"quote\"])):\n", "\n", " result = classifier(text, candidate_labels)\n", "\n", " # Get index of highest scoring label\n", "\n", " pred_label = candidate_labels.index(result[\"labels\"][0])\n", "\n", " predictions.append(pred_label)\n", " if i == 100:\n", " break\n", "\n", "\n", "# Stop tracking emissions\n", "emissions_data = tracker.stop_task()\n", "emissions_data\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Calculate accuracy\n", "accuracy = accuracy_score(true_labels[:100], predictions[:100])\n", "accuracy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }