none commited on
Commit
82c0cdf
·
1 Parent(s): ebb4897
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.so filter=lfs diff=lfs merge=lfs -text
37
+ *.whl filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1 +1,2 @@
1
  apps.json
 
 
1
  apps.json
2
+ kaggle.json
Dockerfile CHANGED
@@ -1,13 +1,13 @@
1
- # build with: docker build . --tag sctg/roco-idefics3:0.0.15 --tag sctg/roco-idefics3:latest --push
2
  # run with
3
- # docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.15 bash -i /start.sh sleep infinity
4
- # docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -it sctg/roco-idefics3:0.0.15 bash -i /start.sh python /learn.py hf_...
5
  FROM cloudflare/cloudflared:latest as cloudflared
6
- FROM nvidia/cuda:12.6.2-devel-ubuntu22.04
7
  ARG NODE_MAJOR="20"
8
  RUN /usr/sbin/addgroup --gid 42420 ovh
9
  RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
10
- RUN apt update -y && apt-get install -y curl git git-lfs screen sudo \
11
  && mkdir -p /etc/apt/keyrings \
12
  && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
13
  && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
@@ -22,12 +22,14 @@ RUN echo "ovh ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
22
  COPY --from=cloudflared /usr/local/bin/cloudflared /usr/local/bin/cloudflared
23
  USER 42420
24
  RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > /workspace/miniconda.sh
 
25
  RUN /bin/bash /workspace/miniconda.sh -b -p /workspace/.miniconda3
26
  RUN . /workspace/.miniconda3/bin/activate && conda init --all
27
  RUN . /workspace/.miniconda3/bin/activate \
28
  && pip install -U "safetensors>=0.4.5" \
29
  && pip install -U tensorflow \
30
- && pip install -U 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl' \
 
31
  && pip install -U git+https://github.com/huggingface/transformers.git\
32
  && pip install huggingface_hub[cli] accelerate datasets peft\
33
  && pip install -U Pillow \
@@ -39,12 +41,18 @@ RUN . /workspace/.miniconda3/bin/activate \
39
  && pip install unsloth\
40
  && pip install gradio \
41
  && pip uninstall unsloth -y \
42
- && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
 
 
 
 
 
43
 
44
  RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
45
  RUN rm -f /workspace/miniconda.sh
46
  RUN mkdir -p /workspace/.config/github-copilot/
47
  COPY apps.json.enc /workspace/.config/github-copilot/apps.json.enc
 
48
  COPY CFTOKEN.enc /workspace/.config/CFTOKEN.enc
49
  # Mandatory to run the jobs in rootless mode
50
  # USER root
 
1
+ # build with: docker build . --tag sctg/roco-idefics3:0.0.18 --tag sctg/roco-idefics3:latest --push
2
  # run with
3
+ # docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -e WANDB_API_KEY=wdkfjzfjz -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.18 bash -i /start.sh sleep infinity
4
+ # docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -it sctg/roco-idefics3:0.0.18 bash -i /start.sh python /learn.py hf_...
5
  FROM cloudflare/cloudflared:latest as cloudflared
6
+ FROM nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04
7
  ARG NODE_MAJOR="20"
8
  RUN /usr/sbin/addgroup --gid 42420 ovh
9
  RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
10
+ RUN apt update -y && apt-get install -y cmake curl git git-lfs screen sudo \
11
  && mkdir -p /etc/apt/keyrings \
12
  && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
13
  && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
 
22
  COPY --from=cloudflared /usr/local/bin/cloudflared /usr/local/bin/cloudflared
23
  USER 42420
24
  RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > /workspace/miniconda.sh
25
+ COPY bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl
26
  RUN /bin/bash /workspace/miniconda.sh -b -p /workspace/.miniconda3
27
  RUN . /workspace/.miniconda3/bin/activate && conda init --all
28
  RUN . /workspace/.miniconda3/bin/activate \
29
  && pip install -U "safetensors>=0.4.5" \
30
  && pip install -U tensorflow \
31
+ && pip install -U tf-keras \
32
+ && pip install -U /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl \
33
  && pip install -U git+https://github.com/huggingface/transformers.git\
34
  && pip install huggingface_hub[cli] accelerate datasets peft\
35
  && pip install -U Pillow \
 
41
  && pip install unsloth\
42
  && pip install gradio \
43
  && pip uninstall unsloth -y \
44
+ && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git \
45
+ && pip install wandb
46
+
47
+ COPY --chmod=777 libbitsandbytes_cuda124.so /workspace/.miniconda3/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cuda124.so
48
+ COPY --chmod=777 libbitsandbytes_cuda126.so /workspace/.miniconda3/lib/python3.12/site-packages/bitsandbytes/libbitsandbytes_cuda126.so
49
+ RUN rm /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl
50
 
51
  RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
52
  RUN rm -f /workspace/miniconda.sh
53
  RUN mkdir -p /workspace/.config/github-copilot/
54
  COPY apps.json.enc /workspace/.config/github-copilot/apps.json.enc
55
+ COPY kaggle.json.enc /workspace/.config/kaggle.json.enc
56
  COPY CFTOKEN.enc /workspace/.config/CFTOKEN.enc
57
  # Mandatory to run the jobs in rootless mode
58
  # USER root
ROCO-idefics3.ipynb CHANGED
@@ -11,58 +11,6 @@
11
  "The fine-tuning process stores the model checkpoints on a regular basis. Re run the notebook from the last checkpoint to continue the fine-tuning process."
12
  ]
13
  },
14
- {
15
- "cell_type": "markdown",
16
- "metadata": {
17
- "id": "2uGjdGkTI78H"
18
- },
19
- "source": [
20
- "## Try to mount Google Drive"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 1,
26
- "metadata": {
27
- "executionInfo": {
28
- "elapsed": 2,
29
- "status": "aborted",
30
- "timestamp": 1730998196191,
31
- "user": {
32
- "displayName": "Ronan Le Meillat",
33
- "userId": "09161391957806824350"
34
- },
35
- "user_tz": -60
36
- },
37
- "id": "F-zJG-uPIy3d"
38
- },
39
- "outputs": [
40
- {
41
- "ename": "Exception",
42
- "evalue": "You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive",
43
- "output_type": "error",
44
- "traceback": [
45
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
46
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
47
- "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m drive\n",
48
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'google'",
49
- "\nDuring handling of the above exception, another exception occurred:\n",
50
- "\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
51
- "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m drive\u001b[38;5;241m.\u001b[39mmount(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/content/drive\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
52
- "\u001b[0;31mException\u001b[0m: You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive"
53
- ]
54
- }
55
- ],
56
- "source": [
57
- "try:\n",
58
- " import google.colab\n",
59
- " from google.colab import drive\n",
60
- " drive.mount('/content/drive')\n",
61
- " \n",
62
- "except ModuleNotFoundError:\n",
63
- " raise Exception(\"You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive\")"
64
- ]
65
- },
66
  {
67
  "cell_type": "markdown",
68
  "metadata": {},
@@ -72,7 +20,7 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": 2,
76
  "metadata": {
77
  "executionInfo": {
78
  "elapsed": 1459,
@@ -91,140 +39,128 @@
91
  "dataset_id = \"eltorio/ROCOv2-radiology\"\n",
92
  "prompt= \"You are an expert radiologist certified with over 15 years of experience in diagnostic imaging, describe this image\"\n",
93
  "source_model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
94
- "destination_model_id = \"eltorio/IDEFICS3_ROCOv2\"\n",
95
- "# if Google Drive is mounted, the model will be saved in a folder called IDEFICS3_ROCO in the root of your Google Drive\n",
96
- "# else the model will be saved in the current working directory in a folder called IDEFICS3_ROCO\n",
97
- "if 'drive' in globals():\n",
98
- " output_dir = \"/content/drive/MyDrive/IDEFICS3_ROCOv2\"\n",
99
- "else:\n",
100
- " output_dir = \"IDEFICS3_ROCOv2\""
101
  ]
102
  },
103
  {
104
  "cell_type": "markdown",
105
  "metadata": {},
106
  "source": [
107
- "## Login on Hugging Face"
108
  ]
109
  },
110
  {
111
  "cell_type": "code",
112
- "execution_count": null,
113
  "metadata": {},
114
  "outputs": [],
115
  "source": [
116
- "!git config --global credential.helper store\n",
117
- "%pip install huggingface_hub"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ]
119
  },
120
  {
121
  "cell_type": "code",
122
- "execution_count": 3,
123
  "metadata": {},
124
  "outputs": [
125
  {
126
  "name": "stdout",
127
  "output_type": "stream",
128
  "text": [
129
- "Hugging Face token found in environment variable\n"
130
- ]
131
- },
132
- {
133
- "name": "stderr",
134
- "output_type": "stream",
135
- "text": [
136
- "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
137
  ]
138
  }
139
  ],
140
  "source": [
141
- "from huggingface_hub import login\n",
142
- "import os\n",
143
- "\n",
144
- "HF_TOKEN = \"hf_C…………\"\n",
145
- "\n",
146
- "if os.environ.get('HF_TOKEN') is not None:\n",
147
- " HF_TOKEN = os.environ.get('HF_TOKEN')\n",
148
- " print(f\"Hugging Face token found in environment variable\")\n",
149
  "try:\n",
150
- " import google.colab\n",
151
- " from google.colab import userdata\n",
152
- " if (userdata.get('HF_TOKEN') is not None) and (HF_TOKEN == \"\"):\n",
153
- " HF_TOKEN = userdata.get('HF_TOKEN')\n",
154
- " else:\n",
155
- " raise ValueError(\"Please set your Hugging Face token in the user data panel, or pass it as an environment variable\")\n",
156
- "except ModuleNotFoundError:\n",
157
- " if HF_TOKEN is None:\n",
158
- " raise ValueError(\"Please set your Hugging Face token in the user data panel, or pass it as an environment variable\")\n",
159
  "\n",
160
- "login(\n",
161
- " token=HF_TOKEN,\n",
162
- " add_to_git_credential=True\n",
163
- ")"
 
164
  ]
165
  },
166
  {
167
  "cell_type": "markdown",
168
  "metadata": {},
169
  "source": [
170
- "##### Optionally clone the model repository"
171
  ]
172
  },
173
  {
174
  "cell_type": "code",
175
- "execution_count": 3,
176
  "metadata": {},
177
  "outputs": [
 
 
 
 
 
 
 
178
  {
179
  "name": "stdout",
180
  "output_type": "stream",
181
  "text": [
182
- "Cloning into 'IDEFICS3_ROCOv2'...\n",
183
- "remote: Enumerating objects: 3, done.\u001b[K\n",
184
- "remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
185
- "Unpacking objects: 100% (3/3), 1.05 KiB | 1.05 MiB/s, done.\n"
186
  ]
187
  }
188
  ],
189
  "source": [
190
- "# clone Hugging Face model repository\n",
191
- "!git clone https://huggingface.co/$destination_model_id $output_dir"
192
- ]
193
- },
194
- {
195
- "cell_type": "markdown",
196
- "metadata": {
197
- "id": "s8QBFDGDAMtD"
198
- },
199
- "source": [
200
- "### Step 1: Install libraries and dependencies."
201
- ]
202
- },
203
- {
204
- "cell_type": "code",
205
- "execution_count": null,
206
- "metadata": {
207
- "colab": {
208
- "base_uri": "https://localhost:8080/"
209
- },
210
- "executionInfo": {
211
- "elapsed": 33422,
212
- "status": "ok",
213
- "timestamp": 1730997063833,
214
- "user": {
215
- "displayName": "Ronan Le Meillat",
216
- "userId": "09161391957806824350"
217
- },
218
- "user_tz": -60
219
- },
220
- "id": "eLNGOnQtAMtE",
221
- "outputId": "65d09e49-f24c-47f3-d7d7-c1a0a0237290"
222
- },
223
- "outputs": [],
224
- "source": [
225
- "%pip install -q git+https://github.com/huggingface/transformers.git\n",
226
- "%pip install -q accelerate datasets peft\n",
227
- "%pip install -q bitsandbytes"
228
  ]
229
  },
230
  {
@@ -233,12 +169,12 @@
233
  "id": "IjLCnQVEAMtE"
234
  },
235
  "source": [
236
- "### Step 2: Retrieve the dataset from Hugging Face."
237
  ]
238
  },
239
  {
240
  "cell_type": "code",
241
- "execution_count": 4,
242
  "metadata": {
243
  "colab": {
244
  "base_uri": "https://localhost:8080/",
@@ -659,7 +595,7 @@
659
  {
660
  "data": {
661
  "application/vnd.jupyter.widget-view+json": {
662
- "model_id": "c681920ed6e24c35981eda639b1c4458",
663
  "version_major": 2,
664
  "version_minor": 0
665
  },
@@ -673,7 +609,7 @@
673
  {
674
  "data": {
675
  "application/vnd.jupyter.widget-view+json": {
676
- "model_id": "c5c781c8755e4ee79f5c972bb786ddda",
677
  "version_major": 2,
678
  "version_minor": 0
679
  },
@@ -687,7 +623,7 @@
687
  {
688
  "data": {
689
  "application/vnd.jupyter.widget-view+json": {
690
- "model_id": "4ece62cf670f441181bce789dbabb38d",
691
  "version_major": 2,
692
  "version_minor": 0
693
  },
@@ -701,7 +637,7 @@
701
  {
702
  "data": {
703
  "application/vnd.jupyter.widget-view+json": {
704
- "model_id": "368b3734b6094270874bc9ed05220b43",
705
  "version_major": 2,
706
  "version_minor": 0
707
  },
@@ -715,7 +651,7 @@
715
  {
716
  "data": {
717
  "application/vnd.jupyter.widget-view+json": {
718
- "model_id": "e002292c8f73473ba28b353594d047a0",
719
  "version_major": 2,
720
  "version_minor": 0
721
  },
@@ -729,7 +665,7 @@
729
  {
730
  "data": {
731
  "application/vnd.jupyter.widget-view+json": {
732
- "model_id": "b6ec67734a7644c49c6d222db885d9fa",
733
  "version_major": 2,
734
  "version_minor": 0
735
  },
@@ -743,7 +679,7 @@
743
  {
744
  "data": {
745
  "application/vnd.jupyter.widget-view+json": {
746
- "model_id": "d04c94230afb42c4b5488d1517c63fd6",
747
  "version_major": 2,
748
  "version_minor": 0
749
  },
@@ -757,7 +693,7 @@
757
  {
758
  "data": {
759
  "application/vnd.jupyter.widget-view+json": {
760
- "model_id": "0fd069cace5146b8be5b44a89dae32dd",
761
  "version_major": 2,
762
  "version_minor": 0
763
  },
@@ -771,7 +707,7 @@
771
  {
772
  "data": {
773
  "application/vnd.jupyter.widget-view+json": {
774
- "model_id": "e23e8d0ecb394513907c6ecb4a30783d",
775
  "version_major": 2,
776
  "version_minor": 0
777
  },
@@ -785,7 +721,7 @@
785
  {
786
  "data": {
787
  "application/vnd.jupyter.widget-view+json": {
788
- "model_id": "6d0a484c03274c628585bfd3938dd5dd",
789
  "version_major": 2,
790
  "version_minor": 0
791
  },
@@ -799,7 +735,7 @@
799
  {
800
  "data": {
801
  "application/vnd.jupyter.widget-view+json": {
802
- "model_id": "8a9d84a4ba4c4a9d8762845b466159dc",
803
  "version_major": 2,
804
  "version_minor": 0
805
  },
@@ -813,7 +749,7 @@
813
  {
814
  "data": {
815
  "application/vnd.jupyter.widget-view+json": {
816
- "model_id": "ac978b853b8f4ce48b2240adad35813c",
817
  "version_major": 2,
818
  "version_minor": 0
819
  },
@@ -827,7 +763,7 @@
827
  {
828
  "data": {
829
  "application/vnd.jupyter.widget-view+json": {
830
- "model_id": "dc39d2085a8748269438f3f518a936fa",
831
  "version_major": 2,
832
  "version_minor": 0
833
  },
@@ -841,7 +777,7 @@
841
  {
842
  "data": {
843
  "application/vnd.jupyter.widget-view+json": {
844
- "model_id": "efd5dd9b5acf4a00b9fad256b239fe25",
845
  "version_major": 2,
846
  "version_minor": 0
847
  },
@@ -855,7 +791,7 @@
855
  {
856
  "data": {
857
  "application/vnd.jupyter.widget-view+json": {
858
- "model_id": "0ee7a5cdf9fc44fb95a02a215db064b1",
859
  "version_major": 2,
860
  "version_minor": 0
861
  },
@@ -869,7 +805,7 @@
869
  {
870
  "data": {
871
  "application/vnd.jupyter.widget-view+json": {
872
- "model_id": "d2bc061d040440d48bd1e068e7d0e754",
873
  "version_major": 2,
874
  "version_minor": 0
875
  },
@@ -883,7 +819,7 @@
883
  {
884
  "data": {
885
  "application/vnd.jupyter.widget-view+json": {
886
- "model_id": "290a81df45914de6865a1fd538746d8b",
887
  "version_major": 2,
888
  "version_minor": 0
889
  },
@@ -897,7 +833,7 @@
897
  {
898
  "data": {
899
  "application/vnd.jupyter.widget-view+json": {
900
- "model_id": "e15f56550122473eaf0b9d2c33defc7a",
901
  "version_major": 2,
902
  "version_minor": 0
903
  },
@@ -911,7 +847,7 @@
911
  {
912
  "data": {
913
  "application/vnd.jupyter.widget-view+json": {
914
- "model_id": "219eeafe2fde471d9c524a8f03884678",
915
  "version_major": 2,
916
  "version_minor": 0
917
  },
@@ -925,7 +861,7 @@
925
  {
926
  "data": {
927
  "application/vnd.jupyter.widget-view+json": {
928
- "model_id": "afb3a6d52a6e46268803195d3f7e0e8e",
929
  "version_major": 2,
930
  "version_minor": 0
931
  },
@@ -939,7 +875,7 @@
939
  {
940
  "data": {
941
  "application/vnd.jupyter.widget-view+json": {
942
- "model_id": "fe952f2809c24da48e85d56b8de828d9",
943
  "version_major": 2,
944
  "version_minor": 0
945
  },
@@ -953,7 +889,7 @@
953
  {
954
  "data": {
955
  "application/vnd.jupyter.widget-view+json": {
956
- "model_id": "09e7e2783b464c6197bebcbe45364e65",
957
  "version_major": 2,
958
  "version_minor": 0
959
  },
@@ -967,7 +903,7 @@
967
  {
968
  "data": {
969
  "application/vnd.jupyter.widget-view+json": {
970
- "model_id": "f2142baf4df04c2e8fac7fda5cc4a4b4",
971
  "version_major": 2,
972
  "version_minor": 0
973
  },
@@ -981,7 +917,7 @@
981
  {
982
  "data": {
983
  "application/vnd.jupyter.widget-view+json": {
984
- "model_id": "ce16d5ccf5a244279e29b26f6b9f159f",
985
  "version_major": 2,
986
  "version_minor": 0
987
  },
@@ -995,7 +931,7 @@
995
  {
996
  "data": {
997
  "application/vnd.jupyter.widget-view+json": {
998
- "model_id": "d53b88d0ad5049e493578571a0d33740",
999
  "version_major": 2,
1000
  "version_minor": 0
1001
  },
@@ -1009,7 +945,7 @@
1009
  {
1010
  "data": {
1011
  "application/vnd.jupyter.widget-view+json": {
1012
- "model_id": "785e664dba3a40edb3858acccf6a07b0",
1013
  "version_major": 2,
1014
  "version_minor": 0
1015
  },
@@ -1023,7 +959,7 @@
1023
  {
1024
  "data": {
1025
  "application/vnd.jupyter.widget-view+json": {
1026
- "model_id": "ab300b08c57247d5a972d0d77acfddf4",
1027
  "version_major": 2,
1028
  "version_minor": 0
1029
  },
@@ -1037,7 +973,7 @@
1037
  {
1038
  "data": {
1039
  "application/vnd.jupyter.widget-view+json": {
1040
- "model_id": "00ab18dd02f34aa3a658b456d8bfe390",
1041
  "version_major": 2,
1042
  "version_minor": 0
1043
  },
@@ -1051,7 +987,7 @@
1051
  {
1052
  "data": {
1053
  "application/vnd.jupyter.widget-view+json": {
1054
- "model_id": "421d90d57c8e44a48479eef2eb40a479",
1055
  "version_major": 2,
1056
  "version_minor": 0
1057
  },
@@ -1065,7 +1001,7 @@
1065
  {
1066
  "data": {
1067
  "application/vnd.jupyter.widget-view+json": {
1068
- "model_id": "60abcaa9df3d43a99ef1e07e9b7fbe11",
1069
  "version_major": 2,
1070
  "version_minor": 0
1071
  },
@@ -1079,7 +1015,7 @@
1079
  {
1080
  "data": {
1081
  "application/vnd.jupyter.widget-view+json": {
1082
- "model_id": "79af8edddbfe4543a00e47d5f697866d",
1083
  "version_major": 2,
1084
  "version_minor": 0
1085
  },
@@ -1093,7 +1029,7 @@
1093
  {
1094
  "data": {
1095
  "application/vnd.jupyter.widget-view+json": {
1096
- "model_id": "9d14e22b823d4c9e85b17e4dbd57fec6",
1097
  "version_major": 2,
1098
  "version_minor": 0
1099
  },
@@ -1107,7 +1043,7 @@
1107
  {
1108
  "data": {
1109
  "application/vnd.jupyter.widget-view+json": {
1110
- "model_id": "ee5ba070e76b4854aadfac59b0237fbf",
1111
  "version_major": 2,
1112
  "version_minor": 0
1113
  },
@@ -1121,7 +1057,7 @@
1121
  {
1122
  "data": {
1123
  "application/vnd.jupyter.widget-view+json": {
1124
- "model_id": "8ce96862fb234f6ea335071fc8114574",
1125
  "version_major": 2,
1126
  "version_minor": 0
1127
  },
@@ -1135,7 +1071,7 @@
1135
  {
1136
  "data": {
1137
  "application/vnd.jupyter.widget-view+json": {
1138
- "model_id": "dd5860de108f49e5bcc609bb11a646df",
1139
  "version_major": 2,
1140
  "version_minor": 0
1141
  },
@@ -1149,7 +1085,7 @@
1149
  {
1150
  "data": {
1151
  "application/vnd.jupyter.widget-view+json": {
1152
- "model_id": "bef0c258e4764f0ab9406467f01752a9",
1153
  "version_major": 2,
1154
  "version_minor": 0
1155
  },
@@ -1163,7 +1099,7 @@
1163
  {
1164
  "data": {
1165
  "application/vnd.jupyter.widget-view+json": {
1166
- "model_id": "6c4d53689dad43dd8db780522139f599",
1167
  "version_major": 2,
1168
  "version_minor": 0
1169
  },
@@ -1177,7 +1113,7 @@
1177
  {
1178
  "data": {
1179
  "application/vnd.jupyter.widget-view+json": {
1180
- "model_id": "d50075ea0fcd4a34ac88ed121b1e90bf",
1181
  "version_major": 2,
1182
  "version_minor": 0
1183
  },
@@ -1191,7 +1127,7 @@
1191
  {
1192
  "data": {
1193
  "application/vnd.jupyter.widget-view+json": {
1194
- "model_id": "a4e15de7a1c54400a1ebff02d4caa657",
1195
  "version_major": 2,
1196
  "version_minor": 0
1197
  },
@@ -1205,7 +1141,7 @@
1205
  {
1206
  "data": {
1207
  "application/vnd.jupyter.widget-view+json": {
1208
- "model_id": "41fc8cb29962483e81fa8a640f633d46",
1209
  "version_major": 2,
1210
  "version_minor": 0
1211
  },
@@ -1219,7 +1155,7 @@
1219
  {
1220
  "data": {
1221
  "application/vnd.jupyter.widget-view+json": {
1222
- "model_id": "388b29f8655b4fee87db053d591bd72d",
1223
  "version_major": 2,
1224
  "version_minor": 0
1225
  },
@@ -1233,7 +1169,7 @@
1233
  {
1234
  "data": {
1235
  "application/vnd.jupyter.widget-view+json": {
1236
- "model_id": "6c75919875544db98ee2c64215ecff97",
1237
  "version_major": 2,
1238
  "version_minor": 0
1239
  },
@@ -1247,7 +1183,7 @@
1247
  {
1248
  "data": {
1249
  "application/vnd.jupyter.widget-view+json": {
1250
- "model_id": "a4e8b6ecaf734f33a842f60681e23108",
1251
  "version_major": 2,
1252
  "version_minor": 0
1253
  },
@@ -1261,7 +1197,7 @@
1261
  {
1262
  "data": {
1263
  "application/vnd.jupyter.widget-view+json": {
1264
- "model_id": "7e497cfcd6f04a94aa0ac40d6df938b1",
1265
  "version_major": 2,
1266
  "version_minor": 0
1267
  },
@@ -1275,7 +1211,7 @@
1275
  {
1276
  "data": {
1277
  "application/vnd.jupyter.widget-view+json": {
1278
- "model_id": "56cde5f680f643be887d6f9f804676c9",
1279
  "version_major": 2,
1280
  "version_minor": 0
1281
  },
@@ -1289,7 +1225,7 @@
1289
  {
1290
  "data": {
1291
  "application/vnd.jupyter.widget-view+json": {
1292
- "model_id": "fbcf378be86c40cdbf606a2446d1a252",
1293
  "version_major": 2,
1294
  "version_minor": 0
1295
  },
@@ -1303,7 +1239,7 @@
1303
  {
1304
  "data": {
1305
  "application/vnd.jupyter.widget-view+json": {
1306
- "model_id": "963491aad1f945cfb5811d7239880b54",
1307
  "version_major": 2,
1308
  "version_minor": 0
1309
  },
@@ -1334,7 +1270,7 @@
1334
  },
1335
  {
1336
  "cell_type": "code",
1337
- "execution_count": 5,
1338
  "metadata": {
1339
  "colab": {
1340
  "base_uri": "https://localhost:8080/"
@@ -1362,7 +1298,7 @@
1362
  " 'cui': ['C0037005']}"
1363
  ]
1364
  },
1365
- "execution_count": 5,
1366
  "metadata": {},
1367
  "output_type": "execute_result"
1368
  }
@@ -1373,7 +1309,7 @@
1373
  },
1374
  {
1375
  "cell_type": "code",
1376
- "execution_count": 6,
1377
  "metadata": {
1378
  "colab": {
1379
  "base_uri": "https://localhost:8080/",
@@ -1402,7 +1338,7 @@
1402
  "<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1403
  ]
1404
  },
1405
- "execution_count": 6,
1406
  "metadata": {},
1407
  "output_type": "execute_result"
1408
  }
@@ -1422,7 +1358,7 @@
1422
  },
1423
  {
1424
  "cell_type": "code",
1425
- "execution_count": 7,
1426
  "metadata": {
1427
  "colab": {
1428
  "base_uri": "https://localhost:8080/",
@@ -1465,7 +1401,7 @@
1465
  {
1466
  "data": {
1467
  "application/vnd.jupyter.widget-view+json": {
1468
- "model_id": "f8115bd39ceb47678208bc6dc80a179a",
1469
  "version_major": 2,
1470
  "version_minor": 0
1471
  },
@@ -1476,10 +1412,22 @@
1476
  "metadata": {},
1477
  "output_type": "display_data"
1478
  },
 
 
 
 
 
 
 
 
 
 
 
 
1479
  {
1480
  "data": {
1481
  "application/vnd.jupyter.widget-view+json": {
1482
- "model_id": "195ef910bea946cbac27ae80d12ab37d",
1483
  "version_major": 2,
1484
  "version_minor": 0
1485
  },
@@ -1493,7 +1441,7 @@
1493
  {
1494
  "data": {
1495
  "application/vnd.jupyter.widget-view+json": {
1496
- "model_id": "ee41668130414ec29bd670a4c00ea9dd",
1497
  "version_major": 2,
1498
  "version_minor": 0
1499
  },
@@ -1507,7 +1455,7 @@
1507
  {
1508
  "data": {
1509
  "application/vnd.jupyter.widget-view+json": {
1510
- "model_id": "cae512d46a20437cb2b9054e6d796129",
1511
  "version_major": 2,
1512
  "version_minor": 0
1513
  },
@@ -1521,7 +1469,7 @@
1521
  {
1522
  "data": {
1523
  "application/vnd.jupyter.widget-view+json": {
1524
- "model_id": "15b4f6ea701b40f984b45da569b8fd50",
1525
  "version_major": 2,
1526
  "version_minor": 0
1527
  },
@@ -1535,7 +1483,7 @@
1535
  {
1536
  "data": {
1537
  "application/vnd.jupyter.widget-view+json": {
1538
- "model_id": "711ead0dc9ba4017a5755a1cc111d8c3",
1539
  "version_major": 2,
1540
  "version_minor": 0
1541
  },
@@ -1549,7 +1497,7 @@
1549
  {
1550
  "data": {
1551
  "application/vnd.jupyter.widget-view+json": {
1552
- "model_id": "009a9836643d4a1285ba017cb6dee9fb",
1553
  "version_major": 2,
1554
  "version_minor": 0
1555
  },
@@ -1560,17 +1508,10 @@
1560
  "metadata": {},
1561
  "output_type": "display_data"
1562
  },
1563
- {
1564
- "name": "stderr",
1565
- "output_type": "stream",
1566
- "text": [
1567
- "`low_cpu_mem_usage` was None, now default to True since model is quantized.\n"
1568
- ]
1569
- },
1570
  {
1571
  "data": {
1572
  "application/vnd.jupyter.widget-view+json": {
1573
- "model_id": "1078bb163ca14f4cb242b3afb81b6a70",
1574
  "version_major": 2,
1575
  "version_minor": 0
1576
  },
@@ -1584,7 +1525,7 @@
1584
  {
1585
  "data": {
1586
  "application/vnd.jupyter.widget-view+json": {
1587
- "model_id": "813c7b8d26224b07893f652c1ab25acf",
1588
  "version_major": 2,
1589
  "version_minor": 0
1590
  },
@@ -1598,7 +1539,7 @@
1598
  {
1599
  "data": {
1600
  "application/vnd.jupyter.widget-view+json": {
1601
- "model_id": "8f312307a8c34e9ebac5a4006cb75b15",
1602
  "version_major": 2,
1603
  "version_minor": 0
1604
  },
@@ -1612,7 +1553,7 @@
1612
  {
1613
  "data": {
1614
  "application/vnd.jupyter.widget-view+json": {
1615
- "model_id": "6805d6a27b4b404594f644d8289e3e0c",
1616
  "version_major": 2,
1617
  "version_minor": 0
1618
  },
@@ -1626,7 +1567,7 @@
1626
  {
1627
  "data": {
1628
  "application/vnd.jupyter.widget-view+json": {
1629
- "model_id": "dfa588033e244ba0aef4875d4dca0087",
1630
  "version_major": 2,
1631
  "version_minor": 0
1632
  },
@@ -1640,7 +1581,7 @@
1640
  {
1641
  "data": {
1642
  "application/vnd.jupyter.widget-view+json": {
1643
- "model_id": "de1b8cc27c5744b4b34b48e7a1fb7a00",
1644
  "version_major": 2,
1645
  "version_minor": 0
1646
  },
@@ -1654,7 +1595,7 @@
1654
  {
1655
  "data": {
1656
  "application/vnd.jupyter.widget-view+json": {
1657
- "model_id": "79b23b4c9373457fb28a1cdcc1b23277",
1658
  "version_major": 2,
1659
  "version_minor": 0
1660
  },
@@ -1668,7 +1609,7 @@
1668
  {
1669
  "data": {
1670
  "application/vnd.jupyter.widget-view+json": {
1671
- "model_id": "02c68a9944a44dcc882dcaa7722dfa9b",
1672
  "version_major": 2,
1673
  "version_minor": 0
1674
  },
@@ -1678,16 +1619,27 @@
1678
  },
1679
  "metadata": {},
1680
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
1681
  }
1682
  ],
1683
  "source": [
1684
  "import torch\n",
1685
- "from peft import LoraConfig\n",
1686
  "from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
1687
  "\n",
1688
- "DEVICE = \"cuda:0\"\n",
1689
- "USE_LORA = False\n",
1690
- "USE_QLORA = True\n",
1691
  "\n",
1692
  "processor = AutoProcessor.from_pretrained(\n",
1693
  " source_model_id,\n",
@@ -1715,8 +1667,8 @@
1715
  " torch_dtype=torch.float16,\n",
1716
  " quantization_config=bnb_config if USE_QLORA else None,\n",
1717
  " )\n",
1718
- " model.add_adapter(lora_config)\n",
1719
- " model.enable_adapters()\n",
1720
  "else:\n",
1721
  " model = Idefics3ForConditionalGeneration.from_pretrained(\n",
1722
  " source_model_id,\n",
@@ -1736,7 +1688,7 @@
1736
  },
1737
  {
1738
  "cell_type": "code",
1739
- "execution_count": 8,
1740
  "metadata": {
1741
  "executionInfo": {
1742
  "elapsed": 426,
@@ -1812,7 +1764,7 @@
1812
  },
1813
  {
1814
  "cell_type": "code",
1815
- "execution_count": 9,
1816
  "metadata": {
1817
  "executionInfo": {
1818
  "elapsed": 1008,
@@ -1841,7 +1793,7 @@
1841
  " gradient_accumulation_steps = 8,\n",
1842
  " dataloader_pin_memory = False,\n",
1843
  " save_total_limit = 3,\n",
1844
- " eval_strategy = \"steps\",\n",
1845
  " save_strategy = \"steps\",\n",
1846
  " eval_steps = 100,\n",
1847
  " save_steps = 10, # checkpoint each 10 steps\n",
@@ -1851,14 +1803,15 @@
1851
  " push_to_hub = True,\n",
1852
  " label_names = [\"labels\"],\n",
1853
  " load_best_model_at_end = False,\n",
1854
- " report_to = \"none\",\n",
1855
  " optim = \"paged_adamw_8bit\",\n",
 
1856
  ")"
1857
  ]
1858
  },
1859
  {
1860
  "cell_type": "code",
1861
- "execution_count": 10,
1862
  "metadata": {
1863
  "colab": {
1864
  "base_uri": "https://localhost:8080/"
@@ -1898,7 +1851,7 @@
1898
  },
1899
  {
1900
  "cell_type": "code",
1901
- "execution_count": null,
1902
  "metadata": {
1903
  "colab": {
1904
  "base_uri": "https://localhost:8080/",
@@ -1908,26 +1861,17 @@
1908
  "outputId": "ebb15160-f56e-4899-e608-b0d5fd0ba117"
1909
  },
1910
  "outputs": [
 
 
 
 
 
 
 
1911
  {
1912
  "data": {
1913
  "text/html": [
1914
- "\n",
1915
- " <div>\n",
1916
- " \n",
1917
- " <progress value='92' max='11241' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1918
- " [ 92/11241 9:38:32 < 1194:28:54, 0.00 it/s, Epoch 0.02/3]\n",
1919
- " </div>\n",
1920
- " <table border=\"1\" class=\"dataframe\">\n",
1921
- " <thead>\n",
1922
- " <tr style=\"text-align: left;\">\n",
1923
- " <th>Step</th>\n",
1924
- " <th>Training Loss</th>\n",
1925
- " <th>Validation Loss</th>\n",
1926
- " </tr>\n",
1927
- " </thead>\n",
1928
- " <tbody>\n",
1929
- " </tbody>\n",
1930
- "</table><p>"
1931
  ],
1932
  "text/plain": [
1933
  "<IPython.core.display.HTML object>"
@@ -1937,27 +1881,102 @@
1937
  "output_type": "display_data"
1938
  },
1939
  {
1940
- "name": "stderr",
1941
- "output_type": "stream",
1942
- "text": [
1943
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1944
- " warnings.warn(\n",
1945
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1946
- " warnings.warn(\n",
1947
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1948
- " warnings.warn(\n",
1949
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1950
- " warnings.warn(\n",
1951
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1952
- " warnings.warn(\n",
1953
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1954
- " warnings.warn(\n",
1955
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1956
- " warnings.warn(\n",
1957
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1958
- " warnings.warn(\n",
1959
- "/workspace/.miniconda3/lib/python3.12/site-packages/transformers/integrations/peft.py:434: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.\n",
1960
- " warnings.warn(\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1961
  ]
1962
  }
1963
  ],
@@ -1967,7 +1986,7 @@
1967
  },
1968
  {
1969
  "cell_type": "code",
1970
- "execution_count": 11,
1971
  "metadata": {},
1972
  "outputs": [
1973
  {
@@ -2005,7 +2024,7 @@
2005
  }
2006
  ],
2007
  "source": [
2008
- "model = Idefics3ForConditionalGeneration.from_pretrained(source_model_id , torch_dtype=torch.bfloat16).to(DEVICE)\n",
2009
  "model.load_adapter(destination_model_id, device_map=\"auto\")"
2010
  ]
2011
  },
 
11
  "The fine-tuning process stores the model checkpoints on a regular basis. Re run the notebook from the last checkpoint to continue the fine-tuning process."
12
  ]
13
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  {
15
  "cell_type": "markdown",
16
  "metadata": {},
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 9,
24
  "metadata": {
25
  "executionInfo": {
26
  "elapsed": 1459,
 
39
  "dataset_id = \"eltorio/ROCOv2-radiology\"\n",
40
  "prompt= \"You are an expert radiologist certified with over 15 years of experience in diagnostic imaging, describe this image\"\n",
41
  "source_model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
42
+ "hugging_face_user = \"eltorio\"\n",
43
+ "destination_model = \"IDEFICS3_ROCOv2\"\n",
44
+ "destination_model_id = f\"{hugging_face_user}/{destination_model}\"\n",
45
+ "output_dir = \"IDEFICS3_ROCOv2\""
 
 
 
46
  ]
47
  },
48
  {
49
  "cell_type": "markdown",
50
  "metadata": {},
51
  "source": [
52
+ "### Log in Kaggle"
53
  ]
54
  },
55
  {
56
  "cell_type": "code",
57
+ "execution_count": 2,
58
  "metadata": {},
59
  "outputs": [],
60
  "source": [
61
+ "import os\n",
62
+ "import json\n",
63
+ "if not os.path.exists('/kaggle/.kaggle/kaggle.json'):\n",
64
+ " try:\n",
65
+ " from kaggle_secrets import UserSecretsClient\n",
66
+ " user_secrets = UserSecretsClient()\n",
67
+ " KAGGLE_JSON = user_secrets.get_secret(\"KAGGLE_JSON\")\n",
68
+ " except:\n",
69
+ " KAGGLE_JSON = os.getenv(\"KAGGLE_JSON\")\n",
70
+ "\n",
71
+ " kaggle_dir = os.path.expanduser(\"~/.kaggle\")\n",
72
+ " kaggle_file = os.path.join(kaggle_dir, \"kaggle.json\")\n",
73
+ "\n",
74
+ " os.makedirs(kaggle_dir, exist_ok=True)\n",
75
+ "\n",
76
+ " with open(kaggle_file, 'w') as file:\n",
77
+ " json.dump(KAGGLE_JSON, file)"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "metadata": {},
83
+ "source": [
84
+ "### Login WandB"
85
  ]
86
  },
87
  {
88
  "cell_type": "code",
89
+ "execution_count": 10,
90
  "metadata": {},
91
  "outputs": [
92
  {
93
  "name": "stdout",
94
  "output_type": "stream",
95
  "text": [
96
+ "Logged in to W&B\n"
 
 
 
 
 
 
 
97
  ]
98
  }
99
  ],
100
  "source": [
101
+ "import wandb\n",
 
 
 
 
 
 
 
102
  "try:\n",
103
+ " from kaggle_secrets import UserSecretsClient\n",
104
+ " user_secrets = UserSecretsClient()\n",
105
+ " WANDB_API_KEY = user_secrets.get_secret(\"WANDB_API_KEY\")\n",
106
+ " os.environ[\"WANDB_API_KEY\"] = WANDB_API_KEY\n",
107
+ "except:\n",
108
+ " if os.getenv(\"WANDB_API_KEY\") is None:\n",
109
+ " os.environ[\"WANDB_API_KEY\"] = input(\"Enter your W&B API key: \")\n",
 
 
110
  "\n",
111
+ "if not wandb.login():\n",
112
+ " raise Exception(\"Can't login to W&B\")\n",
113
+ "else:\n",
114
+ " print(\"Logged in to W&B\")\n",
115
+ " os.environ[\"WANDB_PROJECT\"]=destination_model"
116
  ]
117
  },
118
  {
119
  "cell_type": "markdown",
120
  "metadata": {},
121
  "source": [
122
+ "## Login on Hugging Face"
123
  ]
124
  },
125
  {
126
  "cell_type": "code",
127
+ "execution_count": 11,
128
  "metadata": {},
129
  "outputs": [
130
+ {
131
+ "name": "stderr",
132
+ "output_type": "stream",
133
+ "text": [
134
+ "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
135
+ ]
136
+ },
137
  {
138
  "name": "stdout",
139
  "output_type": "stream",
140
  "text": [
141
+ "Login with hf_C************usZNcSKWwmtCiuUAgIxFZ\n"
 
 
 
142
  ]
143
  }
144
  ],
145
  "source": [
146
+ "from huggingface_hub import login\n",
147
+ "import os\n",
148
+ "\n",
149
+ "try:\n",
150
+ " from kaggle_secrets import UserSecretsClient\n",
151
+ " user_secrets = UserSecretsClient()\n",
152
+ " HF_TOKEN = user_secrets.get_secret(\"HF_TOKEN\")\n",
153
+ " os.environ[\"HF_TOKEN\"] = HF_TOKEN\n",
154
+ "except:\n",
155
+ " if not os.getenv(\"HF_TOKEN\"):\n",
156
+ " raise ValueError(\"You need to set the HF_TOKEN environment variable.\")\n",
157
+ " HF_TOKEN = os.getenv(\"HF_TOKEN\")\n",
158
+ "\n",
159
+ "print(f\"Login with {HF_TOKEN[:4]}{'*'*12}{HF_TOKEN[16:]}\")\n",
160
+ "login(\n",
161
+ " token=HF_TOKEN,\n",
162
+ " add_to_git_credential=False\n",
163
+ ")"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  ]
165
  },
166
  {
 
169
  "id": "IjLCnQVEAMtE"
170
  },
171
  "source": [
172
+ "### Step 1: Retrieve the dataset from Hugging Face."
173
  ]
174
  },
175
  {
176
  "cell_type": "code",
177
+ "execution_count": 12,
178
  "metadata": {
179
  "colab": {
180
  "base_uri": "https://localhost:8080/",
 
595
  {
596
  "data": {
597
  "application/vnd.jupyter.widget-view+json": {
598
+ "model_id": "046c24f13d4f45d7badb65f20c79e973",
599
  "version_major": 2,
600
  "version_minor": 0
601
  },
 
609
  {
610
  "data": {
611
  "application/vnd.jupyter.widget-view+json": {
612
+ "model_id": "2e3a624b105e406e98e7fdf1fbfcc6f9",
613
  "version_major": 2,
614
  "version_minor": 0
615
  },
 
623
  {
624
  "data": {
625
  "application/vnd.jupyter.widget-view+json": {
626
+ "model_id": "fea5a84e8b884b6d84e4ab42e9b86130",
627
  "version_major": 2,
628
  "version_minor": 0
629
  },
 
637
  {
638
  "data": {
639
  "application/vnd.jupyter.widget-view+json": {
640
+ "model_id": "bbe912b7f3bf448d8edcb96df599f42a",
641
  "version_major": 2,
642
  "version_minor": 0
643
  },
 
651
  {
652
  "data": {
653
  "application/vnd.jupyter.widget-view+json": {
654
+ "model_id": "5f70bd3f04b0473f940f1c0a66ed412b",
655
  "version_major": 2,
656
  "version_minor": 0
657
  },
 
665
  {
666
  "data": {
667
  "application/vnd.jupyter.widget-view+json": {
668
+ "model_id": "db4ef432b48540afb5c3750e5ac50408",
669
  "version_major": 2,
670
  "version_minor": 0
671
  },
 
679
  {
680
  "data": {
681
  "application/vnd.jupyter.widget-view+json": {
682
+ "model_id": "9deda09dddc847e38093acfe95719461",
683
  "version_major": 2,
684
  "version_minor": 0
685
  },
 
693
  {
694
  "data": {
695
  "application/vnd.jupyter.widget-view+json": {
696
+ "model_id": "2557fa8a477d4bdbbc6c63b9b4b19f77",
697
  "version_major": 2,
698
  "version_minor": 0
699
  },
 
707
  {
708
  "data": {
709
  "application/vnd.jupyter.widget-view+json": {
710
+ "model_id": "12ddf4a453a04a60846b2520b9bdc5e4",
711
  "version_major": 2,
712
  "version_minor": 0
713
  },
 
721
  {
722
  "data": {
723
  "application/vnd.jupyter.widget-view+json": {
724
+ "model_id": "835145ef5d21469f97d1475258b398dc",
725
  "version_major": 2,
726
  "version_minor": 0
727
  },
 
735
  {
736
  "data": {
737
  "application/vnd.jupyter.widget-view+json": {
738
+ "model_id": "ba78f1edad1749459454ae149982f0a4",
739
  "version_major": 2,
740
  "version_minor": 0
741
  },
 
749
  {
750
  "data": {
751
  "application/vnd.jupyter.widget-view+json": {
752
+ "model_id": "d2098ffc181c437dba020d594d0a6fed",
753
  "version_major": 2,
754
  "version_minor": 0
755
  },
 
763
  {
764
  "data": {
765
  "application/vnd.jupyter.widget-view+json": {
766
+ "model_id": "bb4710de5e754e119d55bbea6be576ab",
767
  "version_major": 2,
768
  "version_minor": 0
769
  },
 
777
  {
778
  "data": {
779
  "application/vnd.jupyter.widget-view+json": {
780
+ "model_id": "1b8ada1887d648139c2c089f575cbbc5",
781
  "version_major": 2,
782
  "version_minor": 0
783
  },
 
791
  {
792
  "data": {
793
  "application/vnd.jupyter.widget-view+json": {
794
+ "model_id": "b290432fd1074435a3283fcefc46e6f0",
795
  "version_major": 2,
796
  "version_minor": 0
797
  },
 
805
  {
806
  "data": {
807
  "application/vnd.jupyter.widget-view+json": {
808
+ "model_id": "9f04b5bd31ad4e3ca12555b70959bdd8",
809
  "version_major": 2,
810
  "version_minor": 0
811
  },
 
819
  {
820
  "data": {
821
  "application/vnd.jupyter.widget-view+json": {
822
+ "model_id": "3d02778907134930b869fc0d602bf04a",
823
  "version_major": 2,
824
  "version_minor": 0
825
  },
 
833
  {
834
  "data": {
835
  "application/vnd.jupyter.widget-view+json": {
836
+ "model_id": "e242a3f79e4a471ca7221764bb3bc8c8",
837
  "version_major": 2,
838
  "version_minor": 0
839
  },
 
847
  {
848
  "data": {
849
  "application/vnd.jupyter.widget-view+json": {
850
+ "model_id": "4af61d3296d344e09f202eb471ed5cf2",
851
  "version_major": 2,
852
  "version_minor": 0
853
  },
 
861
  {
862
  "data": {
863
  "application/vnd.jupyter.widget-view+json": {
864
+ "model_id": "60c368d7fbc54ac28d1b387beea698bf",
865
  "version_major": 2,
866
  "version_minor": 0
867
  },
 
875
  {
876
  "data": {
877
  "application/vnd.jupyter.widget-view+json": {
878
+ "model_id": "9c29d19326724a50a3e782ff98d2177f",
879
  "version_major": 2,
880
  "version_minor": 0
881
  },
 
889
  {
890
  "data": {
891
  "application/vnd.jupyter.widget-view+json": {
892
+ "model_id": "16cfe87e688045d18bcd788c07f63292",
893
  "version_major": 2,
894
  "version_minor": 0
895
  },
 
903
  {
904
  "data": {
905
  "application/vnd.jupyter.widget-view+json": {
906
+ "model_id": "f339fd44a8694e769f6a95d96b0b11c9",
907
  "version_major": 2,
908
  "version_minor": 0
909
  },
 
917
  {
918
  "data": {
919
  "application/vnd.jupyter.widget-view+json": {
920
+ "model_id": "dea023c08e31443ab8f06f99aa36a3cf",
921
  "version_major": 2,
922
  "version_minor": 0
923
  },
 
931
  {
932
  "data": {
933
  "application/vnd.jupyter.widget-view+json": {
934
+ "model_id": "da6fabb9e9ed468b95b140bdc250e667",
935
  "version_major": 2,
936
  "version_minor": 0
937
  },
 
945
  {
946
  "data": {
947
  "application/vnd.jupyter.widget-view+json": {
948
+ "model_id": "9ca865fa0cff4ab6bf896bd9e359f4ca",
949
  "version_major": 2,
950
  "version_minor": 0
951
  },
 
959
  {
960
  "data": {
961
  "application/vnd.jupyter.widget-view+json": {
962
+ "model_id": "5b5271ad34af47c3b5d1ca0c7da31632",
963
  "version_major": 2,
964
  "version_minor": 0
965
  },
 
973
  {
974
  "data": {
975
  "application/vnd.jupyter.widget-view+json": {
976
+ "model_id": "21577d910fa14b8ab32c00c0d4e31f9a",
977
  "version_major": 2,
978
  "version_minor": 0
979
  },
 
987
  {
988
  "data": {
989
  "application/vnd.jupyter.widget-view+json": {
990
+ "model_id": "97e0316cd6c0432797fcce4c07c3391b",
991
  "version_major": 2,
992
  "version_minor": 0
993
  },
 
1001
  {
1002
  "data": {
1003
  "application/vnd.jupyter.widget-view+json": {
1004
+ "model_id": "4b1deeac66544256b4df8f95baf21b4a",
1005
  "version_major": 2,
1006
  "version_minor": 0
1007
  },
 
1015
  {
1016
  "data": {
1017
  "application/vnd.jupyter.widget-view+json": {
1018
+ "model_id": "257056bdde0445669fe615aaf91f5b13",
1019
  "version_major": 2,
1020
  "version_minor": 0
1021
  },
 
1029
  {
1030
  "data": {
1031
  "application/vnd.jupyter.widget-view+json": {
1032
+ "model_id": "12dabb10d7dd4cc080783ff2bbd7fe6f",
1033
  "version_major": 2,
1034
  "version_minor": 0
1035
  },
 
1043
  {
1044
  "data": {
1045
  "application/vnd.jupyter.widget-view+json": {
1046
+ "model_id": "f631c0696721488691b50bfb35e26157",
1047
  "version_major": 2,
1048
  "version_minor": 0
1049
  },
 
1057
  {
1058
  "data": {
1059
  "application/vnd.jupyter.widget-view+json": {
1060
+ "model_id": "8a3cc5c12bfa40bb94427b7f84b2607b",
1061
  "version_major": 2,
1062
  "version_minor": 0
1063
  },
 
1071
  {
1072
  "data": {
1073
  "application/vnd.jupyter.widget-view+json": {
1074
+ "model_id": "753b01971b4c4b818addcbee07fd5ebd",
1075
  "version_major": 2,
1076
  "version_minor": 0
1077
  },
 
1085
  {
1086
  "data": {
1087
  "application/vnd.jupyter.widget-view+json": {
1088
+ "model_id": "fe9a979c6ce7437dbd58c3b2ed84ce56",
1089
  "version_major": 2,
1090
  "version_minor": 0
1091
  },
 
1099
  {
1100
  "data": {
1101
  "application/vnd.jupyter.widget-view+json": {
1102
+ "model_id": "bfa00301efbc45d6a776f62b5ee4112b",
1103
  "version_major": 2,
1104
  "version_minor": 0
1105
  },
 
1113
  {
1114
  "data": {
1115
  "application/vnd.jupyter.widget-view+json": {
1116
+ "model_id": "218ceb240e45427392547e041f045680",
1117
  "version_major": 2,
1118
  "version_minor": 0
1119
  },
 
1127
  {
1128
  "data": {
1129
  "application/vnd.jupyter.widget-view+json": {
1130
+ "model_id": "004aa618d2a24aa5966ddaa9afd96001",
1131
  "version_major": 2,
1132
  "version_minor": 0
1133
  },
 
1141
  {
1142
  "data": {
1143
  "application/vnd.jupyter.widget-view+json": {
1144
+ "model_id": "6a9acf4704f543b593753609da89240c",
1145
  "version_major": 2,
1146
  "version_minor": 0
1147
  },
 
1155
  {
1156
  "data": {
1157
  "application/vnd.jupyter.widget-view+json": {
1158
+ "model_id": "39a57445dbcc4335842dadcfcbf5a4ef",
1159
  "version_major": 2,
1160
  "version_minor": 0
1161
  },
 
1169
  {
1170
  "data": {
1171
  "application/vnd.jupyter.widget-view+json": {
1172
+ "model_id": "4ea49e27202a416ab608cea8cb46cd5c",
1173
  "version_major": 2,
1174
  "version_minor": 0
1175
  },
 
1183
  {
1184
  "data": {
1185
  "application/vnd.jupyter.widget-view+json": {
1186
+ "model_id": "14f2007ef3264c7faa55cfcc9effd0e6",
1187
  "version_major": 2,
1188
  "version_minor": 0
1189
  },
 
1197
  {
1198
  "data": {
1199
  "application/vnd.jupyter.widget-view+json": {
1200
+ "model_id": "3a3b653ca45e4757a4e3b81c3ab43b87",
1201
  "version_major": 2,
1202
  "version_minor": 0
1203
  },
 
1211
  {
1212
  "data": {
1213
  "application/vnd.jupyter.widget-view+json": {
1214
+ "model_id": "e753298cf52a40b5b3a7052146fd2cfb",
1215
  "version_major": 2,
1216
  "version_minor": 0
1217
  },
 
1225
  {
1226
  "data": {
1227
  "application/vnd.jupyter.widget-view+json": {
1228
+ "model_id": "c05e1aa2995c489c84f5bdb09cef3ccd",
1229
  "version_major": 2,
1230
  "version_minor": 0
1231
  },
 
1239
  {
1240
  "data": {
1241
  "application/vnd.jupyter.widget-view+json": {
1242
+ "model_id": "9a81639720f34fedbca3c19b42c7a9ae",
1243
  "version_major": 2,
1244
  "version_minor": 0
1245
  },
 
1270
  },
1271
  {
1272
  "cell_type": "code",
1273
+ "execution_count": 13,
1274
  "metadata": {
1275
  "colab": {
1276
  "base_uri": "https://localhost:8080/"
 
1298
  " 'cui': ['C0037005']}"
1299
  ]
1300
  },
1301
+ "execution_count": 13,
1302
  "metadata": {},
1303
  "output_type": "execute_result"
1304
  }
 
1309
  },
1310
  {
1311
  "cell_type": "code",
1312
+ "execution_count": 14,
1313
  "metadata": {
1314
  "colab": {
1315
  "base_uri": "https://localhost:8080/",
 
1338
  "<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1339
  ]
1340
  },
1341
+ "execution_count": 14,
1342
  "metadata": {},
1343
  "output_type": "execute_result"
1344
  }
 
1358
  },
1359
  {
1360
  "cell_type": "code",
1361
+ "execution_count": 34,
1362
  "metadata": {
1363
  "colab": {
1364
  "base_uri": "https://localhost:8080/",
 
1401
  {
1402
  "data": {
1403
  "application/vnd.jupyter.widget-view+json": {
1404
+ "model_id": "4a7333b9c40e4c9abface9d175b637f7",
1405
  "version_major": 2,
1406
  "version_minor": 0
1407
  },
 
1412
  "metadata": {},
1413
  "output_type": "display_data"
1414
  },
1415
+ {
1416
+ "name": "stderr",
1417
+ "output_type": "stream",
1418
+ "text": [
1419
+ "2024-12-05 18:36:11.299502: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
1420
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1421
+ "E0000 00:00:1733423771.360739 225 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
1422
+ "E0000 00:00:1733423771.378961 225 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
1423
+ "2024-12-05 18:36:11.524136: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
1424
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
1425
+ ]
1426
+ },
1427
  {
1428
  "data": {
1429
  "application/vnd.jupyter.widget-view+json": {
1430
+ "model_id": "599872df212a40dba475d0647320f467",
1431
  "version_major": 2,
1432
  "version_minor": 0
1433
  },
 
1441
  {
1442
  "data": {
1443
  "application/vnd.jupyter.widget-view+json": {
1444
+ "model_id": "45acf55c884a491bac43dd85ce80df36",
1445
  "version_major": 2,
1446
  "version_minor": 0
1447
  },
 
1455
  {
1456
  "data": {
1457
  "application/vnd.jupyter.widget-view+json": {
1458
+ "model_id": "91a3839713f146889271126a3f2e6b23",
1459
  "version_major": 2,
1460
  "version_minor": 0
1461
  },
 
1469
  {
1470
  "data": {
1471
  "application/vnd.jupyter.widget-view+json": {
1472
+ "model_id": "303deff5af0940a18e0f19c68bbd9b6b",
1473
  "version_major": 2,
1474
  "version_minor": 0
1475
  },
 
1483
  {
1484
  "data": {
1485
  "application/vnd.jupyter.widget-view+json": {
1486
+ "model_id": "45ed4ff534e444fab1780873ca453c1a",
1487
  "version_major": 2,
1488
  "version_minor": 0
1489
  },
 
1497
  {
1498
  "data": {
1499
  "application/vnd.jupyter.widget-view+json": {
1500
+ "model_id": "94c98750b760459386f52c153724d603",
1501
  "version_major": 2,
1502
  "version_minor": 0
1503
  },
 
1508
  "metadata": {},
1509
  "output_type": "display_data"
1510
  },
 
 
 
 
 
 
 
1511
  {
1512
  "data": {
1513
  "application/vnd.jupyter.widget-view+json": {
1514
+ "model_id": "8337ac6e5e6a4db4982669553b39b8ba",
1515
  "version_major": 2,
1516
  "version_minor": 0
1517
  },
 
1525
  {
1526
  "data": {
1527
  "application/vnd.jupyter.widget-view+json": {
1528
+ "model_id": "c0e7ab2db08a499bbea8bec1c17857dd",
1529
  "version_major": 2,
1530
  "version_minor": 0
1531
  },
 
1539
  {
1540
  "data": {
1541
  "application/vnd.jupyter.widget-view+json": {
1542
+ "model_id": "71bc274e67de44b5bf14c7113b893db7",
1543
  "version_major": 2,
1544
  "version_minor": 0
1545
  },
 
1553
  {
1554
  "data": {
1555
  "application/vnd.jupyter.widget-view+json": {
1556
+ "model_id": "e344829a0ae94e209fc9bc43d794510e",
1557
  "version_major": 2,
1558
  "version_minor": 0
1559
  },
 
1567
  {
1568
  "data": {
1569
  "application/vnd.jupyter.widget-view+json": {
1570
+ "model_id": "da3f4d75fff14d739605a8e9b4aed5cf",
1571
  "version_major": 2,
1572
  "version_minor": 0
1573
  },
 
1581
  {
1582
  "data": {
1583
  "application/vnd.jupyter.widget-view+json": {
1584
+ "model_id": "8e594662af3946f38388ae9d6a22337b",
1585
  "version_major": 2,
1586
  "version_minor": 0
1587
  },
 
1595
  {
1596
  "data": {
1597
  "application/vnd.jupyter.widget-view+json": {
1598
+ "model_id": "7396830f193d4958ae9c2fba9e59bce5",
1599
  "version_major": 2,
1600
  "version_minor": 0
1601
  },
 
1609
  {
1610
  "data": {
1611
  "application/vnd.jupyter.widget-view+json": {
1612
+ "model_id": "64bc001093534c2fa1d0e2207105419e",
1613
  "version_major": 2,
1614
  "version_minor": 0
1615
  },
 
1619
  },
1620
  "metadata": {},
1621
  "output_type": "display_data"
1622
+ },
1623
+ {
1624
+ "name": "stdout",
1625
+ "output_type": "stream",
1626
+ "text": [
1627
+ "g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n",
1628
+ "Copyright (C) 2021 Free Software Foundation, Inc.\n",
1629
+ "This is free software; see the source for copying conditions. There is NO\n",
1630
+ "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
1631
+ "\n"
1632
+ ]
1633
  }
1634
  ],
1635
  "source": [
1636
  "import torch\n",
1637
+ "from peft import LoraConfig,get_peft_model\n",
1638
  "from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
1639
  "\n",
1640
+ "DEVICE = \"cuda\"\n",
1641
+ "USE_LORA = True\n",
1642
+ "USE_QLORA = False\n",
1643
  "\n",
1644
  "processor = AutoProcessor.from_pretrained(\n",
1645
  " source_model_id,\n",
 
1667
  " torch_dtype=torch.float16,\n",
1668
  " quantization_config=bnb_config if USE_QLORA else None,\n",
1669
  " )\n",
1670
+ " model = get_peft_model(model, lora_config)\n",
1671
+ " model = model.to(DEVICE)\n",
1672
  "else:\n",
1673
  " model = Idefics3ForConditionalGeneration.from_pretrained(\n",
1674
  " source_model_id,\n",
 
1688
  },
1689
  {
1690
  "cell_type": "code",
1691
+ "execution_count": 35,
1692
  "metadata": {
1693
  "executionInfo": {
1694
  "elapsed": 426,
 
1764
  },
1765
  {
1766
  "cell_type": "code",
1767
+ "execution_count": null,
1768
  "metadata": {
1769
  "executionInfo": {
1770
  "elapsed": 1008,
 
1793
  " gradient_accumulation_steps = 8,\n",
1794
  " dataloader_pin_memory = False,\n",
1795
  " save_total_limit = 3,\n",
1796
+ " eval_strategy = \"epoch\",\n",
1797
  " save_strategy = \"steps\",\n",
1798
  " eval_steps = 100,\n",
1799
  " save_steps = 10, # checkpoint each 10 steps\n",
 
1803
  " push_to_hub = True,\n",
1804
  " label_names = [\"labels\"],\n",
1805
  " load_best_model_at_end = False,\n",
1806
+ " report_to = \"wandb\",\n",
1807
  " optim = \"paged_adamw_8bit\",\n",
1808
+ " run_name = destination_model,\n",
1809
  ")"
1810
  ]
1811
  },
1812
  {
1813
  "cell_type": "code",
1814
+ "execution_count": 37,
1815
  "metadata": {
1816
  "colab": {
1817
  "base_uri": "https://localhost:8080/"
 
1851
  },
1852
  {
1853
  "cell_type": "code",
1854
+ "execution_count": 38,
1855
  "metadata": {
1856
  "colab": {
1857
  "base_uri": "https://localhost:8080/",
 
1861
  "outputId": "ebb15160-f56e-4899-e608-b0d5fd0ba117"
1862
  },
1863
  "outputs": [
1864
+ {
1865
+ "name": "stderr",
1866
+ "output_type": "stream",
1867
+ "text": [
1868
+ "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n"
1869
+ ]
1870
+ },
1871
  {
1872
  "data": {
1873
  "text/html": [
1874
+ "Changes to your `wandb` environment variables will be ignored because your `wandb` session has already started. For more information on how to modify your settings with `wandb.init()` arguments, please refer to <a href='https://wandb.me/wandb-init' target=\"_blank\">the W&B docs</a>."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1875
  ],
1876
  "text/plain": [
1877
  "<IPython.core.display.HTML object>"
 
1881
  "output_type": "display_data"
1882
  },
1883
  {
1884
+ "data": {
1885
+ "text/html": [
1886
+ "Tracking run with wandb version 0.19.0"
1887
+ ],
1888
+ "text/plain": [
1889
+ "<IPython.core.display.HTML object>"
1890
+ ]
1891
+ },
1892
+ "metadata": {},
1893
+ "output_type": "display_data"
1894
+ },
1895
+ {
1896
+ "data": {
1897
+ "text/html": [
1898
+ "Run data is saved locally in <code>/workspace/wandb/run-20241205_190533-9ckd0brc</code>"
1899
+ ],
1900
+ "text/plain": [
1901
+ "<IPython.core.display.HTML object>"
1902
+ ]
1903
+ },
1904
+ "metadata": {},
1905
+ "output_type": "display_data"
1906
+ },
1907
+ {
1908
+ "data": {
1909
+ "text/html": [
1910
+ "Syncing run <strong><a href='https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2/runs/9ckd0brc' target=\"_blank\">IDEFICS3_ROCOv2</a></strong> to <a href='https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br/>"
1911
+ ],
1912
+ "text/plain": [
1913
+ "<IPython.core.display.HTML object>"
1914
+ ]
1915
+ },
1916
+ "metadata": {},
1917
+ "output_type": "display_data"
1918
+ },
1919
+ {
1920
+ "data": {
1921
+ "text/html": [
1922
+ " View project at <a href='https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2' target=\"_blank\">https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2</a>"
1923
+ ],
1924
+ "text/plain": [
1925
+ "<IPython.core.display.HTML object>"
1926
+ ]
1927
+ },
1928
+ "metadata": {},
1929
+ "output_type": "display_data"
1930
+ },
1931
+ {
1932
+ "data": {
1933
+ "text/html": [
1934
+ " View run at <a href='https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2/runs/9ckd0brc' target=\"_blank\">https://wandb.ai/aeltorio-ac-lille/IDEFICS3_ROCOv2/runs/9ckd0brc</a>"
1935
+ ],
1936
+ "text/plain": [
1937
+ "<IPython.core.display.HTML object>"
1938
+ ]
1939
+ },
1940
+ "metadata": {},
1941
+ "output_type": "display_data"
1942
+ },
1943
+ {
1944
+ "ename": "KeyboardInterrupt",
1945
+ "evalue": "",
1946
+ "output_type": "error",
1947
+ "traceback": [
1948
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1949
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1950
+ "Cell \u001b[0;32mIn[38], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtrain()\n",
1951
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/trainer.py:2157\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 2154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2155\u001b[0m \u001b[38;5;66;03m# Disable progress bars when uploading models during checkpoints to avoid polluting stdout\u001b[39;00m\n\u001b[1;32m 2156\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39mdisable_progress_bars()\n\u001b[0;32m-> 2157\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[1;32m 2158\u001b[0m args\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m 2159\u001b[0m resume_from_checkpoint\u001b[38;5;241m=\u001b[39mresume_from_checkpoint,\n\u001b[1;32m 2160\u001b[0m trial\u001b[38;5;241m=\u001b[39mtrial,\n\u001b[1;32m 2161\u001b[0m ignore_keys_for_eval\u001b[38;5;241m=\u001b[39mignore_keys_for_eval,\n\u001b[1;32m 2162\u001b[0m )\n\u001b[1;32m 2163\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2164\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n",
1952
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/memory.py:158\u001b[0m, in \u001b[0;36mfind_executable_batch_size.<locals>.decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo executable batch size found, reached zero.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m function(batch_size, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_reduce_batch_size(e):\n",
1953
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/trainer.py:2524\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2518\u001b[0m context \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2519\u001b[0m functools\u001b[38;5;241m.\u001b[39mpartial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mno_sync, model\u001b[38;5;241m=\u001b[39mmodel)\n\u001b[1;32m 2520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(batch_samples) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 2521\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m contextlib\u001b[38;5;241m.\u001b[39mnullcontext\n\u001b[1;32m 2522\u001b[0m )\n\u001b[1;32m 2523\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[0;32m-> 2524\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs, num_items_in_batch)\n\u001b[1;32m 2526\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 2527\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 2528\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[1;32m 2529\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 2530\u001b[0m ):\n\u001b[1;32m 2531\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 2532\u001b[0m tr_loss \u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m+\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
1954
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/trainer.py:3654\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs, num_items_in_batch)\u001b[0m\n\u001b[1;32m 3651\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 3653\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3654\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss(model, inputs, num_items_in_batch\u001b[38;5;241m=\u001b[39mnum_items_in_batch)\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m inputs\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 3658\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mtorch_empty_cache_steps \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mtorch_empty_cache_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 3660\u001b[0m ):\n",
1955
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/trainer.py:3708\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs, num_items_in_batch)\u001b[0m\n\u001b[1;32m 3706\u001b[0m loss_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_items_in_batch\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m num_items_in_batch\n\u001b[1;32m 3707\u001b[0m inputs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mloss_kwargs}\n\u001b[0;32m-> 3708\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n\u001b[1;32m 3709\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3710\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3711\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
1956
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1957
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1958
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/operations.py:823\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 823\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1959
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/operations.py:811\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n",
1960
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/amp/autocast_mode.py:44\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1961
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/operations.py:823\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 823\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1962
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/operations.py:811\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n",
1963
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/amp/autocast_mode.py:44\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1964
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/idefics3/modeling_idefics3.py:1196\u001b[0m, in \u001b[0;36mIdefics3ForConditionalGeneration.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1193\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1196\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel(\n\u001b[1;32m 1197\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 1198\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 1199\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 1200\u001b[0m past_key_values\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 1201\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1202\u001b[0m pixel_values\u001b[38;5;241m=\u001b[39mpixel_values,\n\u001b[1;32m 1203\u001b[0m pixel_attention_mask\u001b[38;5;241m=\u001b[39mpixel_attention_mask,\n\u001b[1;32m 1204\u001b[0m image_hidden_states\u001b[38;5;241m=\u001b[39mimage_hidden_states,\n\u001b[1;32m 1205\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 1206\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 1207\u001b[0m output_hidden_states\u001b[38;5;241m=\u001b[39moutput_hidden_states,\n\u001b[1;32m 1208\u001b[0m return_dict\u001b[38;5;241m=\u001b[39mreturn_dict,\n\u001b[1;32m 1209\u001b[0m )\n\u001b[1;32m 1211\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1212\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_head(hidden_states)\n",
1965
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1966
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1967
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/idefics3/modeling_idefics3.py:1020\u001b[0m, in \u001b[0;36mIdefics3Model.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m past_seen_tokens \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m image_hidden_states \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;66;03m# When we generate, we don't want to replace the potential image_token_id that we generated by images\u001b[39;00m\n\u001b[1;32m 1013\u001b[0m \u001b[38;5;66;03m# that simply don't exist\u001b[39;00m\n\u001b[1;32m 1014\u001b[0m inputs_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minputs_merger(\n\u001b[1;32m 1015\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 1016\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1017\u001b[0m image_hidden_states\u001b[38;5;241m=\u001b[39mimage_hidden_states,\n\u001b[1;32m 1018\u001b[0m )\n\u001b[0;32m-> 1020\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext_model(\n\u001b[1;32m 1021\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1022\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 1023\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 1024\u001b[0m past_key_values\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 1025\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 1026\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 1027\u001b[0m output_hidden_states\u001b[38;5;241m=\u001b[39moutput_hidden_states,\n\u001b[1;32m 1028\u001b[0m return_dict\u001b[38;5;241m=\u001b[39mreturn_dict,\n\u001b[1;32m 1029\u001b[0m )\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_dict:\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(v \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m*\u001b[39moutputs, image_hidden_states] \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n",
1968
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1969
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1970
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/modeling_llama.py:913\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, **flash_attn_kwargs)\u001b[0m\n\u001b[1;32m 901\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m 902\u001b[0m decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m 903\u001b[0m hidden_states,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m position_embeddings,\n\u001b[1;32m 911\u001b[0m )\n\u001b[1;32m 912\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 913\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m decoder_layer(\n\u001b[1;32m 914\u001b[0m hidden_states,\n\u001b[1;32m 915\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mcausal_mask,\n\u001b[1;32m 916\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 917\u001b[0m past_key_value\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 918\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 919\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 920\u001b[0m cache_position\u001b[38;5;241m=\u001b[39mcache_position,\n\u001b[1;32m 921\u001b[0m position_embeddings\u001b[38;5;241m=\u001b[39mposition_embeddings,\n\u001b[1;32m 922\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mflash_attn_kwargs,\n\u001b[1;32m 923\u001b[0m )\n\u001b[1;32m 925\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 927\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n",
1971
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1972
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1973
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/modeling_llama.py:640\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 637\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_layernorm(hidden_states)\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# Self Attention\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m hidden_states, self_attn_weights, present_key_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mself_attn(\n\u001b[1;32m 641\u001b[0m hidden_states\u001b[38;5;241m=\u001b[39mhidden_states,\n\u001b[1;32m 642\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 643\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 644\u001b[0m past_key_value\u001b[38;5;241m=\u001b[39mpast_key_value,\n\u001b[1;32m 645\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 646\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 647\u001b[0m cache_position\u001b[38;5;241m=\u001b[39mcache_position,\n\u001b[1;32m 648\u001b[0m position_embeddings\u001b[38;5;241m=\u001b[39mposition_embeddings,\n\u001b[1;32m 649\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 650\u001b[0m )\n\u001b[1;32m 651\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n",
1974
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1975
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1976
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/modeling_llama.py:541\u001b[0m, in \u001b[0;36mLlamaSdpaAttention.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 540\u001b[0m cos, sin \u001b[38;5;241m=\u001b[39m position_embeddings\n\u001b[0;32m--> 541\u001b[0m query_states, key_states \u001b[38;5;241m=\u001b[39m apply_rotary_pos_emb(query_states, key_states, cos, sin)\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m past_key_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 544\u001b[0m \u001b[38;5;66;03m# sin and cos are specific to RoPE models; cache_position needed for the static cache\u001b[39;00m\n\u001b[1;32m 545\u001b[0m cache_kwargs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msin\u001b[39m\u001b[38;5;124m\"\u001b[39m: sin, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcos\u001b[39m\u001b[38;5;124m\"\u001b[39m: cos, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache_position\u001b[39m\u001b[38;5;124m\"\u001b[39m: cache_position}\n",
1977
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/modeling_llama.py:226\u001b[0m, in \u001b[0;36mapply_rotary_pos_emb\u001b[0;34m(q, k, cos, sin, position_ids, unsqueeze_dim)\u001b[0m\n\u001b[1;32m 224\u001b[0m sin \u001b[38;5;241m=\u001b[39m sin\u001b[38;5;241m.\u001b[39munsqueeze(unsqueeze_dim)\n\u001b[1;32m 225\u001b[0m q_embed \u001b[38;5;241m=\u001b[39m (q \u001b[38;5;241m*\u001b[39m cos) \u001b[38;5;241m+\u001b[39m (rotate_half(q) \u001b[38;5;241m*\u001b[39m sin)\n\u001b[0;32m--> 226\u001b[0m k_embed \u001b[38;5;241m=\u001b[39m (k \u001b[38;5;241m*\u001b[39m cos) \u001b[38;5;241m+\u001b[39m (rotate_half(k) \u001b[38;5;241m*\u001b[39m sin)\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m q_embed, k_embed\n",
1978
+ "File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/modeling_llama.py:196\u001b[0m, in \u001b[0;36mrotate_half\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 192\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrope_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdynamic\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 196\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrotate_half\u001b[39m(x):\n\u001b[1;32m 197\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Rotates half the hidden dims of the input.\"\"\"\u001b[39;00m\n\u001b[1;32m 198\u001b[0m x1 \u001b[38;5;241m=\u001b[39m x[\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m, : x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m]\n",
1979
+ "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
1980
  ]
1981
  }
1982
  ],
 
1986
  },
1987
  {
1988
  "cell_type": "code",
1989
+ "execution_count": null,
1990
  "metadata": {},
1991
  "outputs": [
1992
  {
 
2024
  }
2025
  ],
2026
  "source": [
2027
+ "model = Idefics3ForConditionalGeneration.from_pretrained(source_model_id , torch_dtype=torch.float16).to(DEVICE)\n",
2028
  "model.load_adapter(destination_model_id, device_map=\"auto\")"
2029
  ]
2030
  },
bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8904b74767c5a060b0a8fbcaa8f47017a49e005a3e9e17dad2ca170a85f3f99a
3
+ size 10850559
kaggle.json.enc ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ U2FsdGVkX1+BifIqzv3coYZFjOTt55X+lVyUHpD9N9V77Vrnsh4R8wt3OkUoA6Vf
2
+ IY7T8RsNfr7Salx0RFUk/m3QTfHmLKXtVwsBzilKZJY=
libbitsandbytes_cuda124.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c9698af520efd7212d164e394aadb42d45b5fdee0b36ed31ae25422ac29c30
3
+ size 34242400
libbitsandbytes_cuda126.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2525070006f24fdfc5221e2e5bf63f85417298403ea89d80dbbf255884122990
3
+ size 34536168
start.sh CHANGED
@@ -37,6 +37,14 @@ if [ -f /workspace/.config/CFTOKEN.enc ]; then
37
  /usr/local/bin/cloudflared --pidfile /tmp/cf.pid --autoupdate-freq 24h0m0s tunnel run --token $CFTOKEN &
38
  fi
39
 
 
 
 
 
 
 
 
 
40
  git clone https://huggingface.co/eltorio/IDEFICS3_ROCOv2
41
  git config --global user.email "[email protected]"
42
  git config --global user.name "[email protected]"
 
37
  /usr/local/bin/cloudflared --pidfile /tmp/cf.pid --autoupdate-freq 24h0m0s tunnel run --token $CFTOKEN &
38
  fi
39
 
40
+ # Decode kaggle.json.enc file
41
+ # encoded with: openssl aes-256-cbc -base64 -md sha256 -pass pass:"$HF_TOKEN" -in kaggle.json -out kaggle.json.enc
42
+ # decode with: openssl aes-256-cbc -a -d -md sha256 -pass pass:"$HF_TOKEN" -in kaggle.json.enc -out kaggle.json
43
+ if [ -f /workspace/.config/kaggle.json.enc ]; then
44
+ mkdir -p $HOME/.kaggle
45
+ openssl aes-256-cbc -a -d -md sha256 -pass pass:"$HF_TOKEN" -in /workspace/.config/kaggle.json.enc -out /workspace/.kaggle/kaggle.json
46
+ fi
47
+
48
  git clone https://huggingface.co/eltorio/IDEFICS3_ROCOv2
49
  git config --global user.email "[email protected]"
50
  git config --global user.name "[email protected]"