commited on
Browse files- .gitattributes +2 -0
- .gitignore +1 -0
- Dockerfile +15 -7
- ROCO-idefics3.ipynb +285 -266
- bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl +3 -0
- kaggle.json.enc +2 -0
- +3 -0
- +3 -0
- +8 -0
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
36 |
*.so filter=lfs diff=lfs merge=lfs -text
37 |
*.whl filter=lfs diff=lfs merge=lfs -text
@@ -1 +1,2 @@
1 |
1 |
2 |
@@ -1,13 +1,13 @@
1 |
# build with: docker build . --tag sctg/roco-idefics3:0.0.
2 |
# run with
3 |
# docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.
4 |
# docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -it sctg/roco-idefics3:0.0.
5 |
FROM cloudflare/cloudflared:latest as cloudflared
6 |
FROM nvidia/cuda:12.6.2-devel-ubuntu22.04
7 |
8 |
RUN /usr/sbin/addgroup --gid 42420 ovh
9 |
RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
10 |
RUN apt update -y && apt-get install -y curl git git-lfs screen sudo \
11 |
&& mkdir -p /etc/apt/keyrings \
12 |
&& curl -fsSL | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
13 |
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg]$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
@@ -22,12 +22,14 @@ RUN echo "ovh ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
22 |
COPY --from=cloudflared /usr/local/bin/cloudflared /usr/local/bin/cloudflared
23 |
USER 42420
24 |
RUN curl -L > /workspace/
25 |
RUN /bin/bash /workspace/ -b -p /workspace/.miniconda3
26 |
RUN . /workspace/.miniconda3/bin/activate && conda init --all
27 |
RUN . /workspace/.miniconda3/bin/activate \
28 |
&& pip install -U "safetensors>=0.4.5" \
29 |
&& pip install -U tensorflow \
30 |
&& pip install -U
31 |
&& pip install -U git+\
32 |
&& pip install huggingface_hub[cli] accelerate datasets peft\
33 |
&& pip install -U Pillow \
@@ -39,12 +41,18 @@ RUN . /workspace/.miniconda3/bin/activate \
39 |
&& pip install unsloth\
40 |
&& pip install gradio \
41 |
&& pip uninstall unsloth -y \
42 |
&& pip install --upgrade --no-cache-dir --no-deps git+
43 |
44 |
RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
45 |
RUN rm -f /workspace/
46 |
RUN mkdir -p /workspace/.config/github-copilot/
47 |
COPY apps.json.enc /workspace/.config/github-copilot/apps.json.enc
48 |
COPY CFTOKEN.enc /workspace/.config/CFTOKEN.enc
49 |
# Mandatory to run the jobs in rootless mode
50 |
# USER root
1 |
# build with: docker build . --tag sctg/roco-idefics3:0.0.18 --tag sctg/roco-idefics3:latest --push
2 |
# run with
3 |
# docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -e WANDB_API_KEY=wdkfjzfjz -e HF_TOKEN=hf_TOKEN -it sctg/roco-idefics3:0.0.18 bash -i / sleep infinity
4 |
# docker run --gpus all --user=42420:42420 -p 7000-8000:7000-8000 -p 8080:8080 -it sctg/roco-idefics3:0.0.18 bash -i / python / hf_...
5 |
FROM cloudflare/cloudflared:latest as cloudflared
6 |
FROM nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04
7 |
8 |
RUN /usr/sbin/addgroup --gid 42420 ovh
9 |
RUN /usr/sbin/useradd -u 42420 --gid 42420 -m -d /workspace -s /bin/bash ovh
10 |
RUN apt update -y && apt-get install -y cmake curl git git-lfs screen sudo \
11 |
&& mkdir -p /etc/apt/keyrings \
12 |
&& curl -fsSL | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
13 |
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg]$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
22 |
COPY --from=cloudflared /usr/local/bin/cloudflared /usr/local/bin/cloudflared
23 |
USER 42420
24 |
RUN curl -L > /workspace/
25 |
COPY bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl
26 |
RUN /bin/bash /workspace/ -b -p /workspace/.miniconda3
27 |
RUN . /workspace/.miniconda3/bin/activate && conda init --all
28 |
RUN . /workspace/.miniconda3/bin/activate \
29 |
&& pip install -U "safetensors>=0.4.5" \
30 |
&& pip install -U tensorflow \
31 |
&& pip install -U tf-keras \
32 |
&& pip install -U /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl \
33 |
&& pip install -U git+\
34 |
&& pip install huggingface_hub[cli] accelerate datasets peft\
35 |
&& pip install -U Pillow \
41 |
&& pip install unsloth\
42 |
&& pip install gradio \
43 |
&& pip uninstall unsloth -y \
44 |
&& pip install --upgrade --no-cache-dir --no-deps git+ \
45 |
&& pip install wandb
46 |
47 |
COPY --chmod=777 /workspace/.miniconda3/lib/python3.12/site-packages/bitsandbytes/
48 |
COPY --chmod=777 /workspace/.miniconda3/lib/python3.12/site-packages/bitsandbytes/
49 |
RUN rm /tmp/bitsandbytes-0.45.0-cp312-cp312-linux_x86_64.whl
50 |
51 |
RUN . /workspace/.miniconda3/bin/activate && conda install -y jupyter
52 |
RUN rm -f /workspace/
53 |
RUN mkdir -p /workspace/.config/github-copilot/
54 |
COPY apps.json.enc /workspace/.config/github-copilot/apps.json.enc
55 |
COPY kaggle.json.enc /workspace/.config/kaggle.json.enc
56 |
COPY CFTOKEN.enc /workspace/.config/CFTOKEN.enc
57 |
# Mandatory to run the jobs in rootless mode
58 |
# USER root
@@ -11,58 +11,6 @@
11 |
"The fine-tuning process stores the model checkpoints on a regular basis. Re run the notebook from the last checkpoint to continue the fine-tuning process."
12 |
13 |
14 |
15 |
"cell_type": "markdown",
16 |
"metadata": {
17 |
"id": "2uGjdGkTI78H"
18 |
19 |
"source": [
20 |
"## Try to mount Google Drive"
21 |
22 |
23 |
24 |
"cell_type": "code",
25 |
"execution_count": 1,
26 |
"metadata": {
27 |
"executionInfo": {
28 |
"elapsed": 2,
29 |
"status": "aborted",
30 |
"timestamp": 1730998196191,
31 |
"user": {
32 |
"displayName": "Ronan Le Meillat",
33 |
"userId": "09161391957806824350"
34 |
35 |
"user_tz": -60
36 |
37 |
"id": "F-zJG-uPIy3d"
38 |
39 |
"outputs": [
40 |
41 |
"ename": "Exception",
42 |
"evalue": "You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive",
43 |
"output_type": "error",
44 |
"traceback": [
45 |
46 |
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
47 |
"Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgoogle\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcolab\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m drive\n",
48 |
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'google'",
49 |
"\nDuring handling of the above exception, another exception occurred:\n",
50 |
"\u001b[0;31mException\u001b[0m Traceback (most recent call last)",
51 |
"Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m drive\u001b[38;5;241m.\u001b[39mmount(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/content/drive\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m:\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
52 |
"\u001b[0;31mException\u001b[0m: You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive"
53 |
54 |
55 |
56 |
"source": [
57 |
58 |
" import google.colab\n",
59 |
" from google.colab import drive\n",
60 |
" drive.mount('/content/drive')\n",
61 |
" \n",
62 |
"except ModuleNotFoundError:\n",
63 |
" raise Exception(\"You are not running this code in Google Colab. Please use Google Colab if you would like to save the model to Google Drive\")"
64 |
65 |
66 |
67 |
"cell_type": "markdown",
68 |
"metadata": {},
@@ -72,7 +20,7 @@
72 |
73 |
74 |
"cell_type": "code",
75 |
76 |
"metadata": {
77 |
"executionInfo": {
78 |
"elapsed": 1459,
@@ -91,140 +39,128 @@
91 |
"dataset_id = \"eltorio/ROCOv2-radiology\"\n",
92 |
"prompt= \"You are an expert radiologist certified with over 15 years of experience in diagnostic imaging, describe this image\"\n",
93 |
"source_model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
94 |
95 |
96 |
97 |
98 |
" output_dir = \"/content/drive/MyDrive/IDEFICS3_ROCOv2\"\n",
99 |
100 |
" output_dir = \"IDEFICS3_ROCOv2\""
101 |
102 |
103 |
104 |
"cell_type": "markdown",
105 |
"metadata": {},
106 |
"source": [
107 |
108 |
109 |
110 |
111 |
"cell_type": "code",
112 |
113 |
"metadata": {},
114 |
"outputs": [],
115 |
"source": [
116 |
117 |
118 |
119 |
120 |
121 |
"cell_type": "code",
122 |
123 |
"metadata": {},
124 |
"outputs": [
125 |
126 |
"name": "stdout",
127 |
"output_type": "stream",
128 |
"text": [
129 |
130 |
131 |
132 |
133 |
"name": "stderr",
134 |
"output_type": "stream",
135 |
"text": [
136 |
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
137 |
138 |
139 |
140 |
"source": [
141 |
142 |
"import os\n",
143 |
144 |
"HF_TOKEN = \"hf_C…………\"\n",
145 |
146 |
"if os.environ.get('HF_TOKEN') is not None:\n",
147 |
" HF_TOKEN = os.environ.get('HF_TOKEN')\n",
148 |
" print(f\"Hugging Face token found in environment variable\")\n",
149 |
150 |
" import
151 |
152 |
153 |
154 |
155 |
156 |
157 |
" if HF_TOKEN is None:\n",
158 |
" raise ValueError(\"Please set your Hugging Face token in the user data panel, or pass it as an environment variable\")\n",
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
"cell_type": "markdown",
168 |
"metadata": {},
169 |
"source": [
170 |
171 |
172 |
173 |
174 |
"cell_type": "code",
175 |
176 |
"metadata": {},
177 |
"outputs": [
178 |
179 |
"name": "stdout",
180 |
"output_type": "stream",
181 |
"text": [
182 |
183 |
"remote: Enumerating objects: 3, done.\u001b[K\n",
184 |
"remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
185 |
"Unpacking objects: 100% (3/3), 1.05 KiB | 1.05 MiB/s, done.\n"
186 |
187 |
188 |
189 |
"source": [
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
"base_uri": "https://localhost:8080/"
209 |
210 |
"executionInfo": {
211 |
"elapsed": 33422,
212 |
"status": "ok",
213 |
"timestamp": 1730997063833,
214 |
"user": {
215 |
"displayName": "Ronan Le Meillat",
216 |
"userId": "09161391957806824350"
217 |
218 |
"user_tz": -60
219 |
220 |
"id": "eLNGOnQtAMtE",
221 |
"outputId": "65d09e49-f24c-47f3-d7d7-c1a0a0237290"
222 |
223 |
"outputs": [],
224 |
"source": [
225 |
"%pip install -q git+\n",
226 |
"%pip install -q accelerate datasets peft\n",
227 |
"%pip install -q bitsandbytes"
228 |
229 |
230 |
@@ -233,12 +169,12 @@
233 |
"id": "IjLCnQVEAMtE"
234 |
235 |
"source": [
236 |
"### Step
237 |
238 |
239 |
240 |
"cell_type": "code",
241 |
242 |
"metadata": {
243 |
"colab": {
244 |
"base_uri": "https://localhost:8080/",
@@ -659,7 +595,7 @@
659 |
660 |
"data": {
661 |
"application/vnd.jupyter.widget-view+json": {
662 |
"model_id": "
663 |
"version_major": 2,
664 |
"version_minor": 0
665 |
@@ -673,7 +609,7 @@
673 |
674 |
"data": {
675 |
"application/vnd.jupyter.widget-view+json": {
676 |
"model_id": "
677 |
"version_major": 2,
678 |
"version_minor": 0
679 |
@@ -687,7 +623,7 @@
687 |
688 |
"data": {
689 |
"application/vnd.jupyter.widget-view+json": {
690 |
"model_id": "
691 |
"version_major": 2,
692 |
"version_minor": 0
693 |
@@ -701,7 +637,7 @@
701 |
702 |
"data": {
703 |
"application/vnd.jupyter.widget-view+json": {
704 |
"model_id": "
705 |
"version_major": 2,
706 |
"version_minor": 0
707 |
@@ -715,7 +651,7 @@
715 |
716 |
"data": {
717 |
"application/vnd.jupyter.widget-view+json": {
718 |
"model_id": "
719 |
"version_major": 2,
720 |
"version_minor": 0
721 |
@@ -729,7 +665,7 @@
729 |
730 |
"data": {
731 |
"application/vnd.jupyter.widget-view+json": {
732 |
"model_id": "
733 |
"version_major": 2,
734 |
"version_minor": 0
735 |
@@ -743,7 +679,7 @@
743 |
744 |
"data": {
745 |
"application/vnd.jupyter.widget-view+json": {
746 |
"model_id": "
747 |
"version_major": 2,
748 |
"version_minor": 0
749 |
@@ -757,7 +693,7 @@
757 |
758 |
"data": {
759 |
"application/vnd.jupyter.widget-view+json": {
760 |
"model_id": "
761 |
"version_major": 2,
762 |
"version_minor": 0
763 |
@@ -771,7 +707,7 @@
771 |
772 |
"data": {
773 |
"application/vnd.jupyter.widget-view+json": {
774 |
"model_id": "
775 |
"version_major": 2,
776 |
"version_minor": 0
777 |
@@ -785,7 +721,7 @@
785 |
786 |
"data": {
787 |
"application/vnd.jupyter.widget-view+json": {
788 |
"model_id": "
789 |
"version_major": 2,
790 |
"version_minor": 0
791 |
@@ -799,7 +735,7 @@
799 |
800 |
"data": {
801 |
"application/vnd.jupyter.widget-view+json": {
802 |
"model_id": "
803 |
"version_major": 2,
804 |
"version_minor": 0
805 |
@@ -813,7 +749,7 @@
813 |
814 |
"data": {
815 |
"application/vnd.jupyter.widget-view+json": {
816 |
"model_id": "
817 |
"version_major": 2,
818 |
"version_minor": 0
819 |
@@ -827,7 +763,7 @@
827 |
828 |
"data": {
829 |
"application/vnd.jupyter.widget-view+json": {
830 |
"model_id": "
831 |
"version_major": 2,
832 |
"version_minor": 0
833 |
@@ -841,7 +777,7 @@
841 |
842 |
"data": {
843 |
"application/vnd.jupyter.widget-view+json": {
844 |
"model_id": "
845 |
"version_major": 2,
846 |
"version_minor": 0
847 |
@@ -855,7 +791,7 @@
855 |
856 |
"data": {
857 |
"application/vnd.jupyter.widget-view+json": {
858 |
"model_id": "
859 |
"version_major": 2,
860 |
"version_minor": 0
861 |
@@ -869,7 +805,7 @@
869 |
870 |
"data": {
871 |
"application/vnd.jupyter.widget-view+json": {
872 |
"model_id": "
873 |
"version_major": 2,
874 |
"version_minor": 0
875 |
@@ -883,7 +819,7 @@
883 |
884 |
"data": {
885 |
"application/vnd.jupyter.widget-view+json": {
886 |
"model_id": "
887 |
"version_major": 2,
888 |
"version_minor": 0
889 |
@@ -897,7 +833,7 @@
897 |
898 |
"data": {
899 |
"application/vnd.jupyter.widget-view+json": {
900 |
"model_id": "
901 |
"version_major": 2,
902 |
"version_minor": 0
903 |
@@ -911,7 +847,7 @@
911 |
912 |
"data": {
913 |
"application/vnd.jupyter.widget-view+json": {
914 |
"model_id": "
915 |
"version_major": 2,
916 |
"version_minor": 0
917 |
@@ -925,7 +861,7 @@
925 |
926 |
"data": {
927 |
"application/vnd.jupyter.widget-view+json": {
928 |
"model_id": "
929 |
"version_major": 2,
930 |
"version_minor": 0
931 |
@@ -939,7 +875,7 @@
939 |
940 |
"data": {
941 |
"application/vnd.jupyter.widget-view+json": {
942 |
"model_id": "
943 |
"version_major": 2,
944 |
"version_minor": 0
945 |
@@ -953,7 +889,7 @@
953 |
954 |
"data": {
955 |
"application/vnd.jupyter.widget-view+json": {
956 |
"model_id": "
957 |
"version_major": 2,
958 |
"version_minor": 0
959 |
@@ -967,7 +903,7 @@
967 |
968 |
"data": {
969 |
"application/vnd.jupyter.widget-view+json": {
970 |
"model_id": "
971 |
"version_major": 2,
972 |
"version_minor": 0
973 |
@@ -981,7 +917,7 @@
981 |
982 |
"data": {
983 |
"application/vnd.jupyter.widget-view+json": {
984 |
"model_id": "
985 |
"version_major": 2,
986 |
"version_minor": 0
987 |
@@ -995,7 +931,7 @@
995 |
996 |
"data": {
997 |
"application/vnd.jupyter.widget-view+json": {
998 |
"model_id": "
999 |
"version_major": 2,
1000 |
"version_minor": 0
1001 |
@@ -1009,7 +945,7 @@
1009 |
1010 |
"data": {
1011 |
"application/vnd.jupyter.widget-view+json": {
1012 |
"model_id": "
1013 |
"version_major": 2,
1014 |
"version_minor": 0
1015 |
@@ -1023,7 +959,7 @@
1023 |
1024 |
"data": {
1025 |
"application/vnd.jupyter.widget-view+json": {
1026 |
"model_id": "
1027 |
"version_major": 2,
1028 |
"version_minor": 0
1029 |
@@ -1037,7 +973,7 @@
1037 |
1038 |
"data": {
1039 |
"application/vnd.jupyter.widget-view+json": {
1040 |
"model_id": "
1041 |
"version_major": 2,
1042 |
"version_minor": 0
1043 |
@@ -1051,7 +987,7 @@
1051 |
1052 |
"data": {
1053 |
"application/vnd.jupyter.widget-view+json": {
1054 |
"model_id": "
1055 |
"version_major": 2,
1056 |
"version_minor": 0
1057 |
@@ -1065,7 +1001,7 @@
1065 |
1066 |
"data": {
1067 |
"application/vnd.jupyter.widget-view+json": {
1068 |
"model_id": "
1069 |
"version_major": 2,
1070 |
"version_minor": 0
1071 |
@@ -1079,7 +1015,7 @@
1079 |
1080 |
"data": {
1081 |
"application/vnd.jupyter.widget-view+json": {
1082 |
"model_id": "
1083 |
"version_major": 2,
1084 |
"version_minor": 0
1085 |
@@ -1093,7 +1029,7 @@
1093 |
1094 |
"data": {
1095 |
"application/vnd.jupyter.widget-view+json": {
1096 |
"model_id": "
1097 |
"version_major": 2,
1098 |
"version_minor": 0
1099 |
@@ -1107,7 +1043,7 @@
1107 |
1108 |
"data": {
1109 |
"application/vnd.jupyter.widget-view+json": {
1110 |
"model_id": "
1111 |
"version_major": 2,
1112 |
"version_minor": 0
1113 |
@@ -1121,7 +1057,7 @@
1121 |
1122 |
"data": {
1123 |
"application/vnd.jupyter.widget-view+json": {
1124 |
"model_id": "
1125 |
"version_major": 2,
1126 |
"version_minor": 0
1127 |
@@ -1135,7 +1071,7 @@
1135 |
1136 |
"data": {
1137 |
"application/vnd.jupyter.widget-view+json": {
1138 |
"model_id": "
1139 |
"version_major": 2,
1140 |
"version_minor": 0
1141 |
@@ -1149,7 +1085,7 @@
1149 |
1150 |
"data": {
1151 |
"application/vnd.jupyter.widget-view+json": {
1152 |
"model_id": "
1153 |
"version_major": 2,
1154 |
"version_minor": 0
1155 |
@@ -1163,7 +1099,7 @@
1163 |
1164 |
"data": {
1165 |
"application/vnd.jupyter.widget-view+json": {
1166 |
"model_id": "
1167 |
"version_major": 2,
1168 |
"version_minor": 0
1169 |
@@ -1177,7 +1113,7 @@
1177 |
1178 |
"data": {
1179 |
"application/vnd.jupyter.widget-view+json": {
1180 |
"model_id": "
1181 |
"version_major": 2,
1182 |
"version_minor": 0
1183 |
@@ -1191,7 +1127,7 @@
1191 |
1192 |
"data": {
1193 |
"application/vnd.jupyter.widget-view+json": {
1194 |
"model_id": "
1195 |
"version_major": 2,
1196 |
"version_minor": 0
1197 |
@@ -1205,7 +1141,7 @@
1205 |
1206 |
"data": {
1207 |
"application/vnd.jupyter.widget-view+json": {
1208 |
"model_id": "
1209 |
"version_major": 2,
1210 |
"version_minor": 0
1211 |
@@ -1219,7 +1155,7 @@
1219 |
1220 |
"data": {
1221 |
"application/vnd.jupyter.widget-view+json": {
1222 |
"model_id": "
1223 |
"version_major": 2,
1224 |
"version_minor": 0
1225 |
@@ -1233,7 +1169,7 @@
1233 |
1234 |
"data": {
1235 |
"application/vnd.jupyter.widget-view+json": {
1236 |
"model_id": "
1237 |
"version_major": 2,
1238 |
"version_minor": 0
1239 |
@@ -1247,7 +1183,7 @@
1247 |
1248 |
"data": {
1249 |
"application/vnd.jupyter.widget-view+json": {
1250 |
"model_id": "
1251 |
"version_major": 2,
1252 |
"version_minor": 0
1253 |
@@ -1261,7 +1197,7 @@
1261 |
1262 |
"data": {
1263 |
"application/vnd.jupyter.widget-view+json": {
1264 |
"model_id": "
1265 |
"version_major": 2,
1266 |
"version_minor": 0
1267 |
@@ -1275,7 +1211,7 @@
1275 |
1276 |
"data": {
1277 |
"application/vnd.jupyter.widget-view+json": {
1278 |
"model_id": "
1279 |
"version_major": 2,
1280 |
"version_minor": 0
1281 |
@@ -1289,7 +1225,7 @@
1289 |
1290 |
"data": {
1291 |
"application/vnd.jupyter.widget-view+json": {
1292 |
"model_id": "
1293 |
"version_major": 2,
1294 |
"version_minor": 0
1295 |
@@ -1303,7 +1239,7 @@
1303 |
1304 |
"data": {
1305 |
"application/vnd.jupyter.widget-view+json": {
1306 |
"model_id": "
1307 |
"version_major": 2,
1308 |
"version_minor": 0
1309 |
@@ -1334,7 +1270,7 @@
1334 |
1335 |
1336 |
"cell_type": "code",
1337 |
1338 |
"metadata": {
1339 |
"colab": {
1340 |
"base_uri": "https://localhost:8080/"
@@ -1362,7 +1298,7 @@
1362 |
" 'cui': ['C0037005']}"
1363 |
1364 |
1365 |
1366 |
"metadata": {},
1367 |
"output_type": "execute_result"
1368 |
@@ -1373,7 +1309,7 @@
1373 |
1374 |
1375 |
"cell_type": "code",
1376 |
1377 |
"metadata": {
1378 |
"colab": {
1379 |
"base_uri": "https://localhost:8080/",
@@ -1402,7 +1338,7 @@
1402 |
"<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1403 |
1404 |
1405 |
1406 |
"metadata": {},
1407 |
"output_type": "execute_result"
1408 |
@@ -1422,7 +1358,7 @@
1422 |
1423 |
1424 |
"cell_type": "code",
1425 |
1426 |
"metadata": {
1427 |
"colab": {
1428 |
"base_uri": "https://localhost:8080/",
@@ -1465,7 +1401,7 @@
1465 |
1466 |
"data": {
1467 |
"application/vnd.jupyter.widget-view+json": {
1468 |
"model_id": "
1469 |
"version_major": 2,
1470 |
"version_minor": 0
1471 |
@@ -1476,10 +1412,22 @@
1476 |
"metadata": {},
1477 |
"output_type": "display_data"
1478 |
1479 |
1480 |
"data": {
1481 |
"application/vnd.jupyter.widget-view+json": {
1482 |
"model_id": "
1483 |
"version_major": 2,
1484 |
"version_minor": 0
1485 |
@@ -1493,7 +1441,7 @@
1493 |
1494 |
"data": {
1495 |
"application/vnd.jupyter.widget-view+json": {
1496 |
"model_id": "
1497 |
"version_major": 2,
1498 |
"version_minor": 0
1499 |
@@ -1507,7 +1455,7 @@
1507 |
1508 |
"data": {
1509 |
"application/vnd.jupyter.widget-view+json": {
1510 |
"model_id": "
1511 |
"version_major": 2,
1512 |
"version_minor": 0
1513 |
@@ -1521,7 +1469,7 @@
1521 |
1522 |
"data": {
1523 |
"application/vnd.jupyter.widget-view+json": {
1524 |
"model_id": "
1525 |
"version_major": 2,
1526 |
"version_minor": 0
1527 |
@@ -1535,7 +1483,7 @@
1535 |
1536 |
"data": {
1537 |
"application/vnd.jupyter.widget-view+json": {
1538 |
"model_id": "
1539 |
"version_major": 2,
1540 |
"version_minor": 0
1541 |
@@ -1549,7 +1497,7 @@
1549 |
1550 |
"data": {
1551 |
"application/vnd.jupyter.widget-view+json": {
1552 |
"model_id": "
1553 |
"version_major": 2,
1554 |
"version_minor": 0
1555 |
@@ -1560,17 +1508,10 @@
1560 |
"metadata": {},
1561 |
"output_type": "display_data"
1562 |
1563 |
1564 |
"name": "stderr",
1565 |
"output_type": "stream",
1566 |
"text": [
1567 |
"`low_cpu_mem_usage` was None, now default to True since model is quantized.\n"
1568 |
1569 |
1570 |
1571 |
"data": {
1572 |
"application/vnd.jupyter.widget-view+json": {
1573 |
"model_id": "
1574 |
"version_major": 2,
1575 |
"version_minor": 0
1576 |
@@ -1584,7 +1525,7 @@
1584 |
1585 |
"data": {
1586 |
"application/vnd.jupyter.widget-view+json": {
1587 |
"model_id": "
1588 |
"version_major": 2,
1589 |
"version_minor": 0
1590 |
@@ -1598,7 +1539,7 @@
1598 |
1599 |
"data": {
1600 |
"application/vnd.jupyter.widget-view+json": {
1601 |
"model_id": "
1602 |
"version_major": 2,
1603 |
"version_minor": 0
1604 |
@@ -1612,7 +1553,7 @@
1612 |
1613 |
"data": {
1614 |
"application/vnd.jupyter.widget-view+json": {
1615 |
"model_id": "
1616 |
"version_major": 2,
1617 |
"version_minor": 0
1618 |
@@ -1626,7 +1567,7 @@
1626 |
1627 |
"data": {
1628 |
"application/vnd.jupyter.widget-view+json": {
1629 |
"model_id": "
1630 |
"version_major": 2,
1631 |
"version_minor": 0
1632 |
@@ -1640,7 +1581,7 @@
1640 |
1641 |
"data": {
1642 |
"application/vnd.jupyter.widget-view+json": {
1643 |
"model_id": "
1644 |
"version_major": 2,
1645 |
"version_minor": 0
1646 |
@@ -1654,7 +1595,7 @@
1654 |
1655 |
"data": {
1656 |
"application/vnd.jupyter.widget-view+json": {
1657 |
"model_id": "
1658 |
"version_major": 2,
1659 |
"version_minor": 0
1660 |
@@ -1668,7 +1609,7 @@
1668 |
1669 |
"data": {
1670 |
"application/vnd.jupyter.widget-view+json": {
1671 |
"model_id": "
1672 |
"version_major": 2,
1673 |
"version_minor": 0
1674 |
@@ -1678,16 +1619,27 @@
1678 |
1679 |
"metadata": {},
1680 |
"output_type": "display_data"
1681 |
1682 |
1683 |
"source": [
1684 |
"import torch\n",
1685 |
"from peft import LoraConfig\n",
1686 |
"from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
1687 |
1688 |
"DEVICE = \"cuda
1689 |
1690 |
1691 |
1692 |
"processor = AutoProcessor.from_pretrained(\n",
1693 |
" source_model_id,\n",
@@ -1715,8 +1667,8 @@
1715 |
" torch_dtype=torch.float16,\n",
1716 |
" quantization_config=bnb_config if USE_QLORA else None,\n",
1717 |
" )\n",
1718 |
" model
1719 |
" model.
1720 |
1721 |
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
1722 |
" source_model_id,\n",
@@ -1736,7 +1688,7 @@
1736 |
1737 |
1738 |
"cell_type": "code",
1739 |
1740 |
"metadata": {
1741 |
"executionInfo": {
1742 |
"elapsed": 426,
@@ -1812,7 +1764,7 @@
1812 |
1813 |
1814 |
"cell_type": "code",
1815 |
1816 |
"metadata": {
1817 |
"executionInfo": {
1818 |
"elapsed": 1008,
@@ -1841,7 +1793,7 @@
1841 |
" gradient_accumulation_steps = 8,\n",
1842 |
" dataloader_pin_memory = False,\n",
1843 |
" save_total_limit = 3,\n",
1844 |
" eval_strategy = \"
1845 |
" save_strategy = \"steps\",\n",
1846 |
" eval_steps = 100,\n",
1847 |
" save_steps = 10, # checkpoint each 10 steps\n",
@@ -1851,14 +1803,15 @@
1851 |
" push_to_hub = True,\n",
1852 |
" label_names = [\"labels\"],\n",
1853 |
" load_best_model_at_end = False,\n",
1854 |
" report_to = \"
1855 |
" optim = \"paged_adamw_8bit\",\n",
1856 |
1857 |
1858 |
1859 |
1860 |
"cell_type": "code",
1861 |
1862 |
"metadata": {
1863 |
"colab": {
1864 |
"base_uri": "https://localhost:8080/"
@@ -1898,7 +1851,7 @@
1898 |
1899 |
1900 |
"cell_type": "code",
1901 |
1902 |
"metadata": {
1903 |
"colab": {
1904 |
"base_uri": "https://localhost:8080/",
@@ -1908,26 +1861,17 @@
1908 |
"outputId": "ebb15160-f56e-4899-e608-b0d5fd0ba117"
1909 |
1910 |
"outputs": [
1911 |
1912 |
"data": {
1913 |
"text/html": [
1914 |
1915 |
" <div>\n",
1916 |
" \n",
1917 |
" <progress value='92' max='11241' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1918 |
" [ 92/11241 9:38:32 < 1194:28:54, 0.00 it/s, Epoch 0.02/3]\n",
1919 |
" </div>\n",
1920 |
" <table border=\"1\" class=\"dataframe\">\n",
1921 |
" <thead>\n",
1922 |
" <tr style=\"text-align: left;\">\n",
1923 |
" <th>Step</th>\n",
1924 |
" <th>Training Loss</th>\n",
1925 |
" <th>Validation Loss</th>\n",
1926 |
" </tr>\n",
1927 |
" </thead>\n",
1928 |
" <tbody>\n",
1929 |
" </tbody>\n",
1930 |
1931 |
1932 |
"text/plain": [
1933 |
"<IPython.core.display.HTML object>"
@@ -1937,27 +1881,102 @@
1937 |
"output_type": "display_data"
1938 |
1939 |
1940 |
1941 |
1942 |
1943 |
1944 |
1945 |
1946 |
1947 |
1948 |
1949 |
1950 |
1951 |
1952 |
1953 |
1954 |
1955 |
1956 |
1957 |
1958 |
1959 |
1960 |
1961 |
1962 |
1963 |
@@ -1967,7 +1986,7 @@
1967 |
1968 |
1969 |
"cell_type": "code",
1970 |
1971 |
"metadata": {},
1972 |
"outputs": [
1973 |
@@ -2005,7 +2024,7 @@
2005 |
2006 |
2007 |
"source": [
2008 |
"model = Idefics3ForConditionalGeneration.from_pretrained(source_model_id , torch_dtype=torch.
2009 |
"model.load_adapter(destination_model_id, device_map=\"auto\")"
2010 |
2011 |
11 |
"The fine-tuning process stores the model checkpoints on a regular basis. Re run the notebook from the last checkpoint to continue the fine-tuning process."
12 |
13 |
14 |
15 |
"cell_type": "markdown",
16 |
"metadata": {},
20 |
21 |
22 |
"cell_type": "code",
23 |
"execution_count": 9,
24 |
"metadata": {
25 |
"executionInfo": {
26 |
"elapsed": 1459,
39 |
"dataset_id = \"eltorio/ROCOv2-radiology\"\n",
40 |
"prompt= \"You are an expert radiologist certified with over 15 years of experience in diagnostic imaging, describe this image\"\n",
41 |
"source_model_id = \"HuggingFaceM4/Idefics3-8B-Llama3\"\n",
42 |
"hugging_face_user = \"eltorio\"\n",
43 |
"destination_model = \"IDEFICS3_ROCOv2\"\n",
44 |
"destination_model_id = f\"{hugging_face_user}/{destination_model}\"\n",
45 |
"output_dir = \"IDEFICS3_ROCOv2\""
46 |
47 |
48 |
49 |
"cell_type": "markdown",
50 |
"metadata": {},
51 |
"source": [
52 |
"### Log in Kaggle"
53 |
54 |
55 |
56 |
"cell_type": "code",
57 |
"execution_count": 2,
58 |
"metadata": {},
59 |
"outputs": [],
60 |
"source": [
61 |
"import os\n",
62 |
"import json\n",
63 |
"if not os.path.exists('/kaggle/.kaggle/kaggle.json'):\n",
64 |
" try:\n",
65 |
" from kaggle_secrets import UserSecretsClient\n",
66 |
" user_secrets = UserSecretsClient()\n",
67 |
" KAGGLE_JSON = user_secrets.get_secret(\"KAGGLE_JSON\")\n",
68 |
" except:\n",
69 |
" KAGGLE_JSON = os.getenv(\"KAGGLE_JSON\")\n",
70 |
71 |
" kaggle_dir = os.path.expanduser(\"~/.kaggle\")\n",
72 |
" kaggle_file = os.path.join(kaggle_dir, \"kaggle.json\")\n",
73 |
74 |
" os.makedirs(kaggle_dir, exist_ok=True)\n",
75 |
76 |
" with open(kaggle_file, 'w') as file:\n",
77 |
" json.dump(KAGGLE_JSON, file)"
78 |
79 |
80 |
81 |
"cell_type": "markdown",
82 |
"metadata": {},
83 |
"source": [
84 |
"### Login WandB"
85 |
86 |
87 |
88 |
"cell_type": "code",
89 |
"execution_count": 10,
90 |
"metadata": {},
91 |
"outputs": [
92 |
93 |
"name": "stdout",
94 |
"output_type": "stream",
95 |
"text": [
96 |
"Logged in to W&B\n"
97 |
98 |
99 |
100 |
"source": [
101 |
"import wandb\n",
102 |
103 |
" from kaggle_secrets import UserSecretsClient\n",
104 |
" user_secrets = UserSecretsClient()\n",
105 |
" WANDB_API_KEY = user_secrets.get_secret(\"WANDB_API_KEY\")\n",
106 |
" os.environ[\"WANDB_API_KEY\"] = WANDB_API_KEY\n",
107 |
108 |
" if os.getenv(\"WANDB_API_KEY\") is None:\n",
109 |
" os.environ[\"WANDB_API_KEY\"] = input(\"Enter your W&B API key: \")\n",
110 |
111 |
"if not wandb.login():\n",
112 |
" raise Exception(\"Can't login to W&B\")\n",
113 |
114 |
" print(\"Logged in to W&B\")\n",
115 |
" os.environ[\"WANDB_PROJECT\"]=destination_model"
116 |
117 |
118 |
119 |
"cell_type": "markdown",
120 |
"metadata": {},
121 |
"source": [
122 |
"## Login on Hugging Face"
123 |
124 |
125 |
126 |
"cell_type": "code",
127 |
"execution_count": 11,
128 |
"metadata": {},
129 |
"outputs": [
130 |
131 |
"name": "stderr",
132 |
"output_type": "stream",
133 |
"text": [
134 |
"Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.\n"
135 |
136 |
137 |
138 |
"name": "stdout",
139 |
"output_type": "stream",
140 |
"text": [
141 |
"Login with hf_C************usZNcSKWwmtCiuUAgIxFZ\n"
142 |
143 |
144 |
145 |
"source": [
146 |
"from huggingface_hub import login\n",
147 |
"import os\n",
148 |
149 |
150 |
" from kaggle_secrets import UserSecretsClient\n",
151 |
" user_secrets = UserSecretsClient()\n",
152 |
" HF_TOKEN = user_secrets.get_secret(\"HF_TOKEN\")\n",
153 |
" os.environ[\"HF_TOKEN\"] = HF_TOKEN\n",
154 |
155 |
" if not os.getenv(\"HF_TOKEN\"):\n",
156 |
" raise ValueError(\"You need to set the HF_TOKEN environment variable.\")\n",
157 |
" HF_TOKEN = os.getenv(\"HF_TOKEN\")\n",
158 |
159 |
"print(f\"Login with {HF_TOKEN[:4]}{'*'*12}{HF_TOKEN[16:]}\")\n",
160 |
161 |
" token=HF_TOKEN,\n",
162 |
" add_to_git_credential=False\n",
163 |
164 |
165 |
166 |
169 |
"id": "IjLCnQVEAMtE"
170 |
171 |
"source": [
172 |
"### Step 1: Retrieve the dataset from Hugging Face."
173 |
174 |
175 |
176 |
"cell_type": "code",
177 |
"execution_count": 12,
178 |
"metadata": {
179 |
"colab": {
180 |
"base_uri": "https://localhost:8080/",
595 |
596 |
"data": {
597 |
"application/vnd.jupyter.widget-view+json": {
598 |
"model_id": "046c24f13d4f45d7badb65f20c79e973",
599 |
"version_major": 2,
600 |
"version_minor": 0
601 |
609 |
610 |
"data": {
611 |
"application/vnd.jupyter.widget-view+json": {
612 |
"model_id": "2e3a624b105e406e98e7fdf1fbfcc6f9",
613 |
"version_major": 2,
614 |
"version_minor": 0
615 |
623 |
624 |
"data": {
625 |
"application/vnd.jupyter.widget-view+json": {
626 |
"model_id": "fea5a84e8b884b6d84e4ab42e9b86130",
627 |
"version_major": 2,
628 |
"version_minor": 0
629 |
637 |
638 |
"data": {
639 |
"application/vnd.jupyter.widget-view+json": {
640 |
"model_id": "bbe912b7f3bf448d8edcb96df599f42a",
641 |
"version_major": 2,
642 |
"version_minor": 0
643 |
651 |
652 |
"data": {
653 |
"application/vnd.jupyter.widget-view+json": {
654 |
"model_id": "5f70bd3f04b0473f940f1c0a66ed412b",
655 |
"version_major": 2,
656 |
"version_minor": 0
657 |
665 |
666 |
"data": {
667 |
"application/vnd.jupyter.widget-view+json": {
668 |
"model_id": "db4ef432b48540afb5c3750e5ac50408",
669 |
"version_major": 2,
670 |
"version_minor": 0
671 |
679 |
680 |
"data": {
681 |
"application/vnd.jupyter.widget-view+json": {
682 |
"model_id": "9deda09dddc847e38093acfe95719461",
683 |
"version_major": 2,
684 |
"version_minor": 0
685 |
693 |
694 |
"data": {
695 |
"application/vnd.jupyter.widget-view+json": {
696 |
"model_id": "2557fa8a477d4bdbbc6c63b9b4b19f77",
697 |
"version_major": 2,
698 |
"version_minor": 0
699 |
707 |
708 |
"data": {
709 |
"application/vnd.jupyter.widget-view+json": {
710 |
"model_id": "12ddf4a453a04a60846b2520b9bdc5e4",
711 |
"version_major": 2,
712 |
"version_minor": 0
713 |
721 |
722 |
"data": {
723 |
"application/vnd.jupyter.widget-view+json": {
724 |
"model_id": "835145ef5d21469f97d1475258b398dc",
725 |
"version_major": 2,
726 |
"version_minor": 0
727 |
735 |
736 |
"data": {
737 |
"application/vnd.jupyter.widget-view+json": {
738 |
"model_id": "ba78f1edad1749459454ae149982f0a4",
739 |
"version_major": 2,
740 |
"version_minor": 0
741 |
749 |
750 |
"data": {
751 |
"application/vnd.jupyter.widget-view+json": {
752 |
"model_id": "d2098ffc181c437dba020d594d0a6fed",
753 |
"version_major": 2,
754 |
"version_minor": 0
755 |
763 |
764 |
"data": {
765 |
"application/vnd.jupyter.widget-view+json": {
766 |
"model_id": "bb4710de5e754e119d55bbea6be576ab",
767 |
"version_major": 2,
768 |
"version_minor": 0
769 |
777 |
778 |
"data": {
779 |
"application/vnd.jupyter.widget-view+json": {
780 |
"model_id": "1b8ada1887d648139c2c089f575cbbc5",
781 |
"version_major": 2,
782 |
"version_minor": 0
783 |
791 |
792 |
"data": {
793 |
"application/vnd.jupyter.widget-view+json": {
794 |
"model_id": "b290432fd1074435a3283fcefc46e6f0",
795 |
"version_major": 2,
796 |
"version_minor": 0
797 |
805 |
806 |
"data": {
807 |
"application/vnd.jupyter.widget-view+json": {
808 |
"model_id": "9f04b5bd31ad4e3ca12555b70959bdd8",
809 |
"version_major": 2,
810 |
"version_minor": 0
811 |
819 |
820 |
"data": {
821 |
"application/vnd.jupyter.widget-view+json": {
822 |
"model_id": "3d02778907134930b869fc0d602bf04a",
823 |
"version_major": 2,
824 |
"version_minor": 0
825 |
833 |
834 |
"data": {
835 |
"application/vnd.jupyter.widget-view+json": {
836 |
"model_id": "e242a3f79e4a471ca7221764bb3bc8c8",
837 |
"version_major": 2,
838 |
"version_minor": 0
839 |
847 |
848 |
"data": {
849 |
"application/vnd.jupyter.widget-view+json": {
850 |
"model_id": "4af61d3296d344e09f202eb471ed5cf2",
851 |
"version_major": 2,
852 |
"version_minor": 0
853 |
861 |
862 |
"data": {
863 |
"application/vnd.jupyter.widget-view+json": {
864 |
"model_id": "60c368d7fbc54ac28d1b387beea698bf",
865 |
"version_major": 2,
866 |
"version_minor": 0
867 |
875 |
876 |
"data": {
877 |
"application/vnd.jupyter.widget-view+json": {
878 |
"model_id": "9c29d19326724a50a3e782ff98d2177f",
879 |
"version_major": 2,
880 |
"version_minor": 0
881 |
889 |
890 |
"data": {
891 |
"application/vnd.jupyter.widget-view+json": {
892 |
"model_id": "16cfe87e688045d18bcd788c07f63292",
893 |
"version_major": 2,
894 |
"version_minor": 0
895 |
903 |
904 |
"data": {
905 |
"application/vnd.jupyter.widget-view+json": {
906 |
"model_id": "f339fd44a8694e769f6a95d96b0b11c9",
907 |
"version_major": 2,
908 |
"version_minor": 0
909 |
917 |
918 |
"data": {
919 |
"application/vnd.jupyter.widget-view+json": {
920 |
"model_id": "dea023c08e31443ab8f06f99aa36a3cf",
921 |
"version_major": 2,
922 |
"version_minor": 0
923 |
931 |
932 |
"data": {
933 |
"application/vnd.jupyter.widget-view+json": {
934 |
"model_id": "da6fabb9e9ed468b95b140bdc250e667",
935 |
"version_major": 2,
936 |
"version_minor": 0
937 |
945 |
946 |
"data": {
947 |
"application/vnd.jupyter.widget-view+json": {
948 |
"model_id": "9ca865fa0cff4ab6bf896bd9e359f4ca",
949 |
"version_major": 2,
950 |
"version_minor": 0
951 |
959 |
960 |
"data": {
961 |
"application/vnd.jupyter.widget-view+json": {
962 |
"model_id": "5b5271ad34af47c3b5d1ca0c7da31632",
963 |
"version_major": 2,
964 |
"version_minor": 0
965 |
973 |
974 |
"data": {
975 |
"application/vnd.jupyter.widget-view+json": {
976 |
"model_id": "21577d910fa14b8ab32c00c0d4e31f9a",
977 |
"version_major": 2,
978 |
"version_minor": 0
979 |
987 |
988 |
"data": {
989 |
"application/vnd.jupyter.widget-view+json": {
990 |
"model_id": "97e0316cd6c0432797fcce4c07c3391b",
991 |
"version_major": 2,
992 |
"version_minor": 0
993 |
1001 |
1002 |
"data": {
1003 |
"application/vnd.jupyter.widget-view+json": {
1004 |
"model_id": "4b1deeac66544256b4df8f95baf21b4a",
1005 |
"version_major": 2,
1006 |
"version_minor": 0
1007 |
1015 |
1016 |
"data": {
1017 |
"application/vnd.jupyter.widget-view+json": {
1018 |
"model_id": "257056bdde0445669fe615aaf91f5b13",
1019 |
"version_major": 2,
1020 |
"version_minor": 0
1021 |
1029 |
1030 |
"data": {
1031 |
"application/vnd.jupyter.widget-view+json": {
1032 |
"model_id": "12dabb10d7dd4cc080783ff2bbd7fe6f",
1033 |
"version_major": 2,
1034 |
"version_minor": 0
1035 |
1043 |
1044 |
"data": {
1045 |
"application/vnd.jupyter.widget-view+json": {
1046 |
"model_id": "f631c0696721488691b50bfb35e26157",
1047 |
"version_major": 2,
1048 |
"version_minor": 0
1049 |
1057 |
1058 |
"data": {
1059 |
"application/vnd.jupyter.widget-view+json": {
1060 |
"model_id": "8a3cc5c12bfa40bb94427b7f84b2607b",
1061 |
"version_major": 2,
1062 |
"version_minor": 0
1063 |
1071 |
1072 |
"data": {
1073 |
"application/vnd.jupyter.widget-view+json": {
1074 |
"model_id": "753b01971b4c4b818addcbee07fd5ebd",
1075 |
"version_major": 2,
1076 |
"version_minor": 0
1077 |
1085 |
1086 |
"data": {
1087 |
"application/vnd.jupyter.widget-view+json": {
1088 |
"model_id": "fe9a979c6ce7437dbd58c3b2ed84ce56",
1089 |
"version_major": 2,
1090 |
"version_minor": 0
1091 |
1099 |
1100 |
"data": {
1101 |
"application/vnd.jupyter.widget-view+json": {
1102 |
"model_id": "bfa00301efbc45d6a776f62b5ee4112b",
1103 |
"version_major": 2,
1104 |
"version_minor": 0
1105 |
1113 |
1114 |
"data": {
1115 |
"application/vnd.jupyter.widget-view+json": {
1116 |
"model_id": "218ceb240e45427392547e041f045680",
1117 |
"version_major": 2,
1118 |
"version_minor": 0
1119 |
1127 |
1128 |
"data": {
1129 |
"application/vnd.jupyter.widget-view+json": {
1130 |
"model_id": "004aa618d2a24aa5966ddaa9afd96001",
1131 |
"version_major": 2,
1132 |
"version_minor": 0
1133 |
1141 |
1142 |
"data": {
1143 |
"application/vnd.jupyter.widget-view+json": {
1144 |
"model_id": "6a9acf4704f543b593753609da89240c",
1145 |
"version_major": 2,
1146 |
"version_minor": 0
1147 |
1155 |
1156 |
"data": {
1157 |
"application/vnd.jupyter.widget-view+json": {
1158 |
"model_id": "39a57445dbcc4335842dadcfcbf5a4ef",
1159 |
"version_major": 2,
1160 |
"version_minor": 0
1161 |
1169 |
1170 |
"data": {
1171 |
"application/vnd.jupyter.widget-view+json": {
1172 |
"model_id": "4ea49e27202a416ab608cea8cb46cd5c",
1173 |
"version_major": 2,
1174 |
"version_minor": 0
1175 |
1183 |
1184 |
"data": {
1185 |
"application/vnd.jupyter.widget-view+json": {
1186 |
"model_id": "14f2007ef3264c7faa55cfcc9effd0e6",
1187 |
"version_major": 2,
1188 |
"version_minor": 0
1189 |
1197 |
1198 |
"data": {
1199 |
"application/vnd.jupyter.widget-view+json": {
1200 |
"model_id": "3a3b653ca45e4757a4e3b81c3ab43b87",
1201 |
"version_major": 2,
1202 |
"version_minor": 0
1203 |
1211 |
1212 |
"data": {
1213 |
"application/vnd.jupyter.widget-view+json": {
1214 |
"model_id": "e753298cf52a40b5b3a7052146fd2cfb",
1215 |
"version_major": 2,
1216 |
"version_minor": 0
1217 |
1225 |
1226 |
"data": {
1227 |
"application/vnd.jupyter.widget-view+json": {
1228 |
"model_id": "c05e1aa2995c489c84f5bdb09cef3ccd",
1229 |
"version_major": 2,
1230 |
"version_minor": 0
1231 |
1239 |
1240 |
"data": {
1241 |
"application/vnd.jupyter.widget-view+json": {
1242 |
"model_id": "9a81639720f34fedbca3c19b42c7a9ae",
1243 |
"version_major": 2,
1244 |
"version_minor": 0
1245 |
1270 |
1271 |
1272 |
"cell_type": "code",
1273 |
"execution_count": 13,
1274 |
"metadata": {
1275 |
"colab": {
1276 |
"base_uri": "https://localhost:8080/"
1298 |
" 'cui': ['C0037005']}"
1299 |
1300 |
1301 |
"execution_count": 13,
1302 |
"metadata": {},
1303 |
"output_type": "execute_result"
1304 |
1309 |
1310 |
1311 |
"cell_type": "code",
1312 |
"execution_count": 14,
1313 |
"metadata": {
1314 |
"colab": {
1315 |
"base_uri": "https://localhost:8080/",
1338 |
"<PIL.PngImagePlugin.PngImageFile image mode=RGB size=1684x2294>"
1339 |
1340 |
1341 |
"execution_count": 14,
1342 |
"metadata": {},
1343 |
"output_type": "execute_result"
1344 |
1358 |
1359 |
1360 |
"cell_type": "code",
1361 |
"execution_count": 34,
1362 |
"metadata": {
1363 |
"colab": {
1364 |
"base_uri": "https://localhost:8080/",
1401 |
1402 |
"data": {
1403 |
"application/vnd.jupyter.widget-view+json": {
1404 |
"model_id": "4a7333b9c40e4c9abface9d175b637f7",
1405 |
"version_major": 2,
1406 |
"version_minor": 0
1407 |
1412 |
"metadata": {},
1413 |
"output_type": "display_data"
1414 |
1415 |
1416 |
"name": "stderr",
1417 |
"output_type": "stream",
1418 |
"text": [
1419 |
"2024-12-05 18:36:11.299502: E external/local_xla/xla/stream_executor/cuda/] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
1420 |
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1421 |
"E0000 00:00:1733423771.360739 225] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
1422 |
"E0000 00:00:1733423771.378961 225] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
1423 |
"2024-12-05 18:36:11.524136: I tensorflow/core/platform/] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
1424 |
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
1425 |
1426 |
1427 |
1428 |
"data": {
1429 |
"application/vnd.jupyter.widget-view+json": {
1430 |
"model_id": "599872df212a40dba475d0647320f467",
1431 |
"version_major": 2,
1432 |
"version_minor": 0
1433 |
1441 |
1442 |
"data": {
1443 |
"application/vnd.jupyter.widget-view+json": {
1444 |
"model_id": "45acf55c884a491bac43dd85ce80df36",
1445 |
"version_major": 2,
1446 |
"version_minor": 0
1447 |
1455 |
1456 |
"data": {
1457 |
"application/vnd.jupyter.widget-view+json": {
1458 |
"model_id": "91a3839713f146889271126a3f2e6b23",
1459 |
"version_major": 2,
1460 |
"version_minor": 0
1461 |
1469 |
1470 |
"data": {
1471 |
"application/vnd.jupyter.widget-view+json": {
1472 |
"model_id": "303deff5af0940a18e0f19c68bbd9b6b",
1473 |
"version_major": 2,
1474 |
"version_minor": 0
1475 |
1483 |
1484 |
"data": {
1485 |
"application/vnd.jupyter.widget-view+json": {
1486 |
"model_id": "45ed4ff534e444fab1780873ca453c1a",
1487 |
"version_major": 2,
1488 |
"version_minor": 0
1489 |
1497 |
1498 |
"data": {
1499 |
"application/vnd.jupyter.widget-view+json": {
1500 |
"model_id": "94c98750b760459386f52c153724d603",
1501 |
"version_major": 2,
1502 |
"version_minor": 0
1503 |
1508 |
"metadata": {},
1509 |
"output_type": "display_data"
1510 |
1511 |
1512 |
"data": {
1513 |
"application/vnd.jupyter.widget-view+json": {
1514 |
"model_id": "8337ac6e5e6a4db4982669553b39b8ba",
1515 |
"version_major": 2,
1516 |
"version_minor": 0
1517 |
1525 |
1526 |
"data": {
1527 |
"application/vnd.jupyter.widget-view+json": {
1528 |
"model_id": "c0e7ab2db08a499bbea8bec1c17857dd",
1529 |
"version_major": 2,
1530 |
"version_minor": 0
1531 |
1539 |
1540 |
"data": {
1541 |
"application/vnd.jupyter.widget-view+json": {
1542 |
"model_id": "71bc274e67de44b5bf14c7113b893db7",
1543 |
"version_major": 2,
1544 |
"version_minor": 0
1545 |
1553 |
1554 |
"data": {
1555 |
"application/vnd.jupyter.widget-view+json": {
1556 |
"model_id": "e344829a0ae94e209fc9bc43d794510e",
1557 |
"version_major": 2,
1558 |
"version_minor": 0
1559 |
1567 |
1568 |
"data": {
1569 |
"application/vnd.jupyter.widget-view+json": {
1570 |
"model_id": "da3f4d75fff14d739605a8e9b4aed5cf",
1571 |
"version_major": 2,
1572 |
"version_minor": 0
1573 |
1581 |
1582 |
"data": {
1583 |
"application/vnd.jupyter.widget-view+json": {
1584 |
"model_id": "8e594662af3946f38388ae9d6a22337b",
1585 |
"version_major": 2,
1586 |
"version_minor": 0
1587 |
1595 |
1596 |
"data": {
1597 |
"application/vnd.jupyter.widget-view+json": {
1598 |
"model_id": "7396830f193d4958ae9c2fba9e59bce5",
1599 |
"version_major": 2,
1600 |
"version_minor": 0
1601 |
1609 |
1610 |
"data": {
1611 |
"application/vnd.jupyter.widget-view+json": {
1612 |
"model_id": "64bc001093534c2fa1d0e2207105419e",
1613 |
"version_major": 2,
1614 |
"version_minor": 0
1615 |
1619 |
1620 |
"metadata": {},
1621 |
"output_type": "display_data"
1622 |
1623 |
1624 |
"name": "stdout",
1625 |
"output_type": "stream",
1626 |
"text": [
1627 |
"g++ (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\n",
1628 |
"Copyright (C) 2021 Free Software Foundation, Inc.\n",
1629 |
"This is free software; see the source for copying conditions. There is NO\n",
1630 |
1631 |
1632 |
1633 |
1634 |
1635 |
"source": [
1636 |
"import torch\n",
1637 |
"from peft import LoraConfig,get_peft_model\n",
1638 |
"from transformers import AutoProcessor, BitsAndBytesConfig, Idefics3ForConditionalGeneration\n",
1639 |
1640 |
"DEVICE = \"cuda\"\n",
1641 |
"USE_LORA = True\n",
1642 |
"USE_QLORA = False\n",
1643 |
1644 |
"processor = AutoProcessor.from_pretrained(\n",
1645 |
" source_model_id,\n",
1667 |
" torch_dtype=torch.float16,\n",
1668 |
" quantization_config=bnb_config if USE_QLORA else None,\n",
1669 |
" )\n",
1670 |
" model = get_peft_model(model, lora_config)\n",
1671 |
" model =\n",
1672 |
1673 |
" model = Idefics3ForConditionalGeneration.from_pretrained(\n",
1674 |
" source_model_id,\n",
1688 |
1689 |
1690 |
"cell_type": "code",
1691 |
"execution_count": 35,
1692 |
"metadata": {
1693 |
"executionInfo": {
1694 |
"elapsed": 426,
1764 |
1765 |
1766 |
"cell_type": "code",
1767 |
"execution_count": null,
1768 |
"metadata": {
1769 |
"executionInfo": {
1770 |
"elapsed": 1008,
1793 |
" gradient_accumulation_steps = 8,\n",
1794 |
" dataloader_pin_memory = False,\n",
1795 |
" save_total_limit = 3,\n",
1796 |
" eval_strategy = \"epoch\",\n",
1797 |
" save_strategy = \"steps\",\n",
1798 |
" eval_steps = 100,\n",
1799 |
" save_steps = 10, # checkpoint each 10 steps\n",
1803 |
" push_to_hub = True,\n",
1804 |
" label_names = [\"labels\"],\n",
1805 |
" load_best_model_at_end = False,\n",
1806 |
" report_to = \"wandb\",\n",
1807 |
" optim = \"paged_adamw_8bit\",\n",
1808 |
" run_name = destination_model,\n",
1809 |
1810 |
1811 |
1812 |
1813 |
"cell_type": "code",
1814 |
"execution_count": 37,
1815 |
"metadata": {
1816 |
"colab": {
1817 |
"base_uri": "https://localhost:8080/"
1851 |
1852 |
1853 |
"cell_type": "code",
1854 |
"execution_count": 38,
1855 |
"metadata": {
1856 |
"colab": {
1857 |
"base_uri": "https://localhost:8080/",
1861 |
"outputId": "ebb15160-f56e-4899-e608-b0d5fd0ba117"
1862 |
1863 |
"outputs": [
1864 |
1865 |
"name": "stderr",
1866 |
"output_type": "stream",
1867 |
"text": [
1868 |
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n"
1869 |
1870 |
1871 |
1872 |
"data": {
1873 |
"text/html": [
1874 |
"Changes to your `wandb` environment variables will be ignored because your `wandb` session has already started. For more information on how to modify your settings with `wandb.init()` arguments, please refer to <a href='' target=\"_blank\">the W&B docs</a>."
1875 |
1876 |
"text/plain": [
1877 |
"<IPython.core.display.HTML object>"
1881 |
"output_type": "display_data"
1882 |
1883 |
1884 |
"data": {
1885 |
"text/html": [
1886 |
"Tracking run with wandb version 0.19.0"
1887 |
1888 |
"text/plain": [
1889 |
"<IPython.core.display.HTML object>"
1890 |
1891 |
1892 |
"metadata": {},
1893 |
"output_type": "display_data"
1894 |
1895 |
1896 |
"data": {
1897 |
"text/html": [
1898 |
"Run data is saved locally in <code>/workspace/wandb/run-20241205_190533-9ckd0brc</code>"
1899 |
1900 |
"text/plain": [
1901 |
"<IPython.core.display.HTML object>"
1902 |
1903 |
1904 |
"metadata": {},
1905 |
"output_type": "display_data"
1906 |
1907 |
1908 |
"data": {
1909 |
"text/html": [
1910 |
"Syncing run <strong><a href='' target=\"_blank\">IDEFICS3_ROCOv2</a></strong> to <a href='' target=\"_blank\">Weights & Biases</a> (<a href='' target=\"_blank\">docs</a>)<br/>"
1911 |
1912 |
"text/plain": [
1913 |
"<IPython.core.display.HTML object>"
1914 |
1915 |
1916 |
"metadata": {},
1917 |
"output_type": "display_data"
1918 |
1919 |
1920 |
"data": {
1921 |
"text/html": [
1922 |
" View project at <a href='' target=\"_blank\"></a>"
1923 |
1924 |
"text/plain": [
1925 |
"<IPython.core.display.HTML object>"
1926 |
1927 |
1928 |
"metadata": {},
1929 |
"output_type": "display_data"
1930 |
1931 |
1932 |
"data": {
1933 |
"text/html": [
1934 |
" View run at <a href='' target=\"_blank\"></a>"
1935 |
1936 |
"text/plain": [
1937 |
"<IPython.core.display.HTML object>"
1938 |
1939 |
1940 |
"metadata": {},
1941 |
"output_type": "display_data"
1942 |
1943 |
1944 |
"ename": "KeyboardInterrupt",
1945 |
"evalue": "",
1946 |
"output_type": "error",
1947 |
"traceback": [
1948 |
1949 |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1950 |
"Cell \u001b[0;32mIn[38], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtrain()\n",
1951 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 2154\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 2155\u001b[0m \u001b[38;5;66;03m# Disable progress bars when uploading models during checkpoints to avoid polluting stdout\u001b[39;00m\n\u001b[1;32m 2156\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39mdisable_progress_bars()\n\u001b[0;32m-> 2157\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[1;32m 2158\u001b[0m args\u001b[38;5;241m=\u001b[39margs,\n\u001b[1;32m 2159\u001b[0m resume_from_checkpoint\u001b[38;5;241m=\u001b[39mresume_from_checkpoint,\n\u001b[1;32m 2160\u001b[0m trial\u001b[38;5;241m=\u001b[39mtrial,\n\u001b[1;32m 2161\u001b[0m ignore_keys_for_eval\u001b[38;5;241m=\u001b[39mignore_keys_for_eval,\n\u001b[1;32m 2162\u001b[0m )\n\u001b[1;32m 2163\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 2164\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n",
1952 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/\u001b[0m, in \u001b[0;36mfind_executable_batch_size.<locals>.decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo executable batch size found, reached zero.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m function(batch_size, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_reduce_batch_size(e):\n",
1953 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2518\u001b[0m context \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 2519\u001b[0m functools\u001b[38;5;241m.\u001b[39mpartial(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mno_sync, model\u001b[38;5;241m=\u001b[39mmodel)\n\u001b[1;32m 2520\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(batch_samples) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 2521\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m contextlib\u001b[38;5;241m.\u001b[39mnullcontext\n\u001b[1;32m 2522\u001b[0m )\n\u001b[1;32m 2523\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[0;32m-> 2524\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs, num_items_in_batch)\n\u001b[1;32m 2526\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 2527\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 2528\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[1;32m 2529\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m 2530\u001b[0m ):\n\u001b[1;32m 2531\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 2532\u001b[0m tr_loss \u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m+\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
1954 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs, num_items_in_batch)\u001b[0m\n\u001b[1;32m 3651\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 3653\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3654\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss(model, inputs, num_items_in_batch\u001b[38;5;241m=\u001b[39mnum_items_in_batch)\n\u001b[1;32m 3656\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m inputs\n\u001b[1;32m 3657\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 3658\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mtorch_empty_cache_steps \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mtorch_empty_cache_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 3660\u001b[0m ):\n",
1955 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs, num_items_in_batch)\u001b[0m\n\u001b[1;32m 3706\u001b[0m loss_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnum_items_in_batch\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m num_items_in_batch\n\u001b[1;32m 3707\u001b[0m inputs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mloss_kwargs}\n\u001b[0;32m-> 3708\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39minputs)\n\u001b[1;32m 3709\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 3710\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 3711\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
1956 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1957 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1958 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 823\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1959 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n",
1960 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/amp/\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1961 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 823\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1962 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/accelerate/utils/\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 810\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 811\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs))\n",
1963 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/amp/\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1964 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/idefics3/\u001b[0m, in \u001b[0;36mIdefics3ForConditionalGeneration.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1193\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1196\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel(\n\u001b[1;32m 1197\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 1198\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 1199\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 1200\u001b[0m past_key_values\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 1201\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1202\u001b[0m pixel_values\u001b[38;5;241m=\u001b[39mpixel_values,\n\u001b[1;32m 1203\u001b[0m pixel_attention_mask\u001b[38;5;241m=\u001b[39mpixel_attention_mask,\n\u001b[1;32m 1204\u001b[0m image_hidden_states\u001b[38;5;241m=\u001b[39mimage_hidden_states,\n\u001b[1;32m 1205\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 1206\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 1207\u001b[0m output_hidden_states\u001b[38;5;241m=\u001b[39moutput_hidden_states,\n\u001b[1;32m 1208\u001b[0m return_dict\u001b[38;5;241m=\u001b[39mreturn_dict,\n\u001b[1;32m 1209\u001b[0m )\n\u001b[1;32m 1211\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1212\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_head(hidden_states)\n",
1965 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1966 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1967 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/idefics3/\u001b[0m, in \u001b[0;36mIdefics3Model.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, pixel_values, pixel_attention_mask, image_hidden_states, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1011\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m past_seen_tokens \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m image_hidden_states \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;66;03m# When we generate, we don't want to replace the potential image_token_id that we generated by images\u001b[39;00m\n\u001b[1;32m 1013\u001b[0m \u001b[38;5;66;03m# that simply don't exist\u001b[39;00m\n\u001b[1;32m 1014\u001b[0m inputs_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minputs_merger(\n\u001b[1;32m 1015\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 1016\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1017\u001b[0m image_hidden_states\u001b[38;5;241m=\u001b[39mimage_hidden_states,\n\u001b[1;32m 1018\u001b[0m )\n\u001b[0;32m-> 1020\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext_model(\n\u001b[1;32m 1021\u001b[0m inputs_embeds\u001b[38;5;241m=\u001b[39minputs_embeds,\n\u001b[1;32m 1022\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 1023\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 1024\u001b[0m past_key_values\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 1025\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 1026\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 1027\u001b[0m output_hidden_states\u001b[38;5;241m=\u001b[39moutput_hidden_states,\n\u001b[1;32m 1028\u001b[0m return_dict\u001b[38;5;241m=\u001b[39mreturn_dict,\n\u001b[1;32m 1029\u001b[0m )\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m return_dict:\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(v \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m*\u001b[39moutputs, image_hidden_states] \u001b[38;5;28;01mif\u001b[39;00m v \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n",
1968 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1969 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1970 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position, **flash_attn_kwargs)\u001b[0m\n\u001b[1;32m 901\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m 902\u001b[0m decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m 903\u001b[0m hidden_states,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m position_embeddings,\n\u001b[1;32m 911\u001b[0m )\n\u001b[1;32m 912\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 913\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m decoder_layer(\n\u001b[1;32m 914\u001b[0m hidden_states,\n\u001b[1;32m 915\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mcausal_mask,\n\u001b[1;32m 916\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 917\u001b[0m past_key_value\u001b[38;5;241m=\u001b[39mpast_key_values,\n\u001b[1;32m 918\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 919\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 920\u001b[0m cache_position\u001b[38;5;241m=\u001b[39mcache_position,\n\u001b[1;32m 921\u001b[0m position_embeddings\u001b[38;5;241m=\u001b[39mposition_embeddings,\n\u001b[1;32m 922\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mflash_attn_kwargs,\n\u001b[1;32m 923\u001b[0m )\n\u001b[1;32m 925\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 927\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n",
1971 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1972 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1973 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 637\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_layernorm(hidden_states)\n\u001b[1;32m 639\u001b[0m \u001b[38;5;66;03m# Self Attention\u001b[39;00m\n\u001b[0;32m--> 640\u001b[0m hidden_states, self_attn_weights, present_key_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mself_attn(\n\u001b[1;32m 641\u001b[0m hidden_states\u001b[38;5;241m=\u001b[39mhidden_states,\n\u001b[1;32m 642\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[1;32m 643\u001b[0m position_ids\u001b[38;5;241m=\u001b[39mposition_ids,\n\u001b[1;32m 644\u001b[0m past_key_value\u001b[38;5;241m=\u001b[39mpast_key_value,\n\u001b[1;32m 645\u001b[0m output_attentions\u001b[38;5;241m=\u001b[39moutput_attentions,\n\u001b[1;32m 646\u001b[0m use_cache\u001b[38;5;241m=\u001b[39muse_cache,\n\u001b[1;32m 647\u001b[0m cache_position\u001b[38;5;241m=\u001b[39mcache_position,\n\u001b[1;32m 648\u001b[0m position_embeddings\u001b[38;5;241m=\u001b[39mposition_embeddings,\n\u001b[1;32m 649\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 650\u001b[0m )\n\u001b[1;32m 651\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n",
1974 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
1975 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/torch/nn/modules/\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
1976 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/\u001b[0m, in \u001b[0;36mLlamaSdpaAttention.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position, position_embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 540\u001b[0m cos, sin \u001b[38;5;241m=\u001b[39m position_embeddings\n\u001b[0;32m--> 541\u001b[0m query_states, key_states \u001b[38;5;241m=\u001b[39m apply_rotary_pos_emb(query_states, key_states, cos, sin)\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m past_key_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 544\u001b[0m \u001b[38;5;66;03m# sin and cos are specific to RoPE models; cache_position needed for the static cache\u001b[39;00m\n\u001b[1;32m 545\u001b[0m cache_kwargs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msin\u001b[39m\u001b[38;5;124m\"\u001b[39m: sin, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcos\u001b[39m\u001b[38;5;124m\"\u001b[39m: cos, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache_position\u001b[39m\u001b[38;5;124m\"\u001b[39m: cache_position}\n",
1977 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/\u001b[0m, in \u001b[0;36mapply_rotary_pos_emb\u001b[0;34m(q, k, cos, sin, position_ids, unsqueeze_dim)\u001b[0m\n\u001b[1;32m 224\u001b[0m sin \u001b[38;5;241m=\u001b[39m sin\u001b[38;5;241m.\u001b[39munsqueeze(unsqueeze_dim)\n\u001b[1;32m 225\u001b[0m q_embed \u001b[38;5;241m=\u001b[39m (q \u001b[38;5;241m*\u001b[39m cos) \u001b[38;5;241m+\u001b[39m (rotate_half(q) \u001b[38;5;241m*\u001b[39m sin)\n\u001b[0;32m--> 226\u001b[0m k_embed \u001b[38;5;241m=\u001b[39m (k \u001b[38;5;241m*\u001b[39m cos) \u001b[38;5;241m+\u001b[39m (rotate_half(k) \u001b[38;5;241m*\u001b[39m sin)\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m q_embed, k_embed\n",
1978 |
"File \u001b[0;32m~/.miniconda3/lib/python3.12/site-packages/transformers/models/llama/\u001b[0m, in \u001b[0;36mrotate_half\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 192\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrope_type\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdynamic\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 196\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrotate_half\u001b[39m(x):\n\u001b[1;32m 197\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Rotates half the hidden dims of the input.\"\"\"\u001b[39;00m\n\u001b[1;32m 198\u001b[0m x1 \u001b[38;5;241m=\u001b[39m x[\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;241m.\u001b[39m, : x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m]\n",
1979 |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
1980 |
1981 |
1982 |
1986 |
1987 |
1988 |
"cell_type": "code",
1989 |
"execution_count": null,
1990 |
"metadata": {},
1991 |
"outputs": [
1992 |
2024 |
2025 |
2026 |
"source": [
2027 |
"model = Idefics3ForConditionalGeneration.from_pretrained(source_model_id , torch_dtype=torch.float16).to(DEVICE)\n",
2028 |
"model.load_adapter(destination_model_id, device_map=\"auto\")"
2029 |
2030 |
@@ -0,0 +1,3 @@
1 |
2 |
oid sha256:8904b74767c5a060b0a8fbcaa8f47017a49e005a3e9e17dad2ca170a85f3f99a
3 |
size 10850559
@@ -0,0 +1,2 @@
1 |
2 |
@@ -0,0 +1,3 @@
1 |
2 |
oid sha256:41c9698af520efd7212d164e394aadb42d45b5fdee0b36ed31ae25422ac29c30
3 |
size 34242400
@@ -0,0 +1,3 @@
1 |
2 |
oid sha256:2525070006f24fdfc5221e2e5bf63f85417298403ea89d80dbbf255884122990
3 |
size 34536168
@@ -37,6 +37,14 @@ if [ -f /workspace/.config/CFTOKEN.enc ]; then
37 |
/usr/local/bin/cloudflared --pidfile /tmp/ --autoupdate-freq 24h0m0s tunnel run --token $CFTOKEN &
38 |
39 |
40 |
git clone
41 |
git config --global "[email protected]"
42 |
git config --global "[email protected]"
37 |
/usr/local/bin/cloudflared --pidfile /tmp/ --autoupdate-freq 24h0m0s tunnel run --token $CFTOKEN &
38 |
39 |
40 |
# Decode kaggle.json.enc file
41 |
# encoded with: openssl aes-256-cbc -base64 -md sha256 -pass pass:"$HF_TOKEN" -in kaggle.json -out kaggle.json.enc
42 |
# decode with: openssl aes-256-cbc -a -d -md sha256 -pass pass:"$HF_TOKEN" -in kaggle.json.enc -out kaggle.json
43 |
if [ -f /workspace/.config/kaggle.json.enc ]; then
44 |
mkdir -p $HOME/.kaggle
45 |
openssl aes-256-cbc -a -d -md sha256 -pass pass:"$HF_TOKEN" -in /workspace/.config/kaggle.json.enc -out /workspace/.kaggle/kaggle.json
46 |
47 |
48 |
git clone
49 |
git config --global "[email protected]"
50 |
git config --global "[email protected]"