Training in progress, step 237, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +403 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17aa1e3e9188180d60a7eaf95829178309ea212216606f52e22ed6afddb7a1d5
 size 25192496

 version https://git-lfs.github.com/spec/v1
+oid sha256:dee6f3340e2b17aaef786029350699bafa87cf73c2c369480afb1088c1138b22
 size 25192496

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:627e42e9b9c5216e65e7511025bd4a0ea157b4b85e4f4366338b8d61fd3ef7c8
 size 13005178

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3b901a9465e9173865298833017841a4e6e6c33766538820afbb13471ecc27c
 size 13005178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d615209b54a363ab34582a2efce2dbee9de0ad7107a279a5f7bd274c5d8b5799
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1629143cbe7f7197eba1a6fb9f86a38482f23b196e46a2898e7aa1e8400e6a9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0950e2dffdf70f6969e672506b8287212d20b088ff729b93c9b723972fc5a09f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e6ec607bcb824b32dbf9532bd2490e6acd122f00ac7ed381d94679fcb4d357f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7619047619047619,
   "eval_steps": 60,
-  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1299,6 +1299,405 @@
       "eval_samples_per_second": 41.18,
       "eval_steps_per_second": 20.59,
       "step": 180
     }
   ],
   "logging_steps": 1,
@@ -1313,12 +1712,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5374302643814400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0031746031746032,
   "eval_steps": 60,
+  "global_step": 237,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 41.18,
       "eval_steps_per_second": 20.59,
       "step": 180
+    },
+    {
+      "epoch": 0.7661375661375661,
+      "grad_norm": 7.884878635406494,
+      "learning_rate": 2.855918772175522e-05,
+      "loss": 3.4112,
+      "step": 181
+    },
+    {
+      "epoch": 0.7703703703703704,
+      "grad_norm": 6.880666255950928,
+      "learning_rate": 2.7597661864045233e-05,
+      "loss": 3.4237,
+      "step": 182
+    },
+    {
+      "epoch": 0.7746031746031746,
+      "grad_norm": 5.849800109863281,
+      "learning_rate": 2.6650003363154963e-05,
+      "loss": 2.3083,
+      "step": 183
+    },
+    {
+      "epoch": 0.7788359788359789,
+      "grad_norm": 14.543538093566895,
+      "learning_rate": 2.5716393725910215e-05,
+      "loss": 3.0707,
+      "step": 184
+    },
+    {
+      "epoch": 0.783068783068783,
+      "grad_norm": 6.161655902862549,
+      "learning_rate": 2.47970117683313e-05,
+      "loss": 2.9202,
+      "step": 185
+    },
+    {
+      "epoch": 0.7873015873015873,
+      "grad_norm": 8.911192893981934,
+      "learning_rate": 2.389203358138419e-05,
+      "loss": 3.8606,
+      "step": 186
+    },
+    {
+      "epoch": 0.7915343915343915,
+      "grad_norm": 6.141244411468506,
+      "learning_rate": 2.3001632497253424e-05,
+      "loss": 2.9532,
+      "step": 187
+    },
+    {
+      "epoch": 0.7957671957671958,
+      "grad_norm": 4.823076248168945,
+      "learning_rate": 2.2125979056143364e-05,
+      "loss": 1.6172,
+      "step": 188
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 8.821215629577637,
+      "learning_rate": 2.1265240973614486e-05,
+      "loss": 3.1327,
+      "step": 189
+    },
+    {
+      "epoch": 0.8042328042328042,
+      "grad_norm": 5.922287464141846,
+      "learning_rate": 2.0419583108460418e-05,
+      "loss": 2.5859,
+      "step": 190
+    },
+    {
+      "epoch": 0.8084656084656084,
+      "grad_norm": 7.51337194442749,
+      "learning_rate": 1.958916743113214e-05,
+      "loss": 2.8097,
+      "step": 191
+    },
+    {
+      "epoch": 0.8126984126984127,
+      "grad_norm": 7.687736988067627,
+      "learning_rate": 1.877415299271561e-05,
+      "loss": 2.5735,
+      "step": 192
+    },
+    {
+      "epoch": 0.816931216931217,
+      "grad_norm": 5.669526100158691,
+      "learning_rate": 1.7974695894468384e-05,
+      "loss": 2.2725,
+      "step": 193
+    },
+    {
+      "epoch": 0.8211640211640212,
+      "grad_norm": 5.734451770782471,
+      "learning_rate": 1.7190949257921196e-05,
+      "loss": 2.132,
+      "step": 194
+    },
+    {
+      "epoch": 0.8253968253968254,
+      "grad_norm": 7.305327892303467,
+      "learning_rate": 1.642306319555027e-05,
+      "loss": 2.4507,
+      "step": 195
+    },
+    {
+      "epoch": 0.8296296296296296,
+      "grad_norm": 6.615843296051025,
+      "learning_rate": 1.5671184782026106e-05,
+      "loss": 2.7883,
+      "step": 196
+    },
+    {
+      "epoch": 0.8338624338624339,
+      "grad_norm": 6.883895397186279,
+      "learning_rate": 1.4935458026043959e-05,
+      "loss": 2.8472,
+      "step": 197
+    },
+    {
+      "epoch": 0.8380952380952381,
+      "grad_norm": 6.329193592071533,
+      "learning_rate": 1.4216023842741455e-05,
+      "loss": 2.9735,
+      "step": 198
+    },
+    {
+      "epoch": 0.8423280423280424,
+      "grad_norm": 8.090577125549316,
+      "learning_rate": 1.3513020026709023e-05,
+      "loss": 2.7412,
+      "step": 199
+    },
+    {
+      "epoch": 0.8465608465608465,
+      "grad_norm": 5.7532172203063965,
+      "learning_rate": 1.2826581225597767e-05,
+      "loss": 2.6063,
+      "step": 200
+    },
+    {
+      "epoch": 0.8507936507936508,
+      "grad_norm": 7.076923370361328,
+      "learning_rate": 1.2156838914330072e-05,
+      "loss": 3.1742,
+      "step": 201
+    },
+    {
+      "epoch": 0.855026455026455,
+      "grad_norm": 6.451399803161621,
+      "learning_rate": 1.1503921369918091e-05,
+      "loss": 2.5756,
+      "step": 202
+    },
+    {
+      "epoch": 0.8592592592592593,
+      "grad_norm": 5.6807427406311035,
+      "learning_rate": 1.0867953646894525e-05,
+      "loss": 2.8328,
+      "step": 203
+    },
+    {
+      "epoch": 0.8634920634920635,
+      "grad_norm": 7.157023906707764,
+      "learning_rate": 1.0249057553360742e-05,
+      "loss": 3.4757,
+      "step": 204
+    },
+    {
+      "epoch": 0.8677248677248677,
+      "grad_norm": 6.03039026260376,
+      "learning_rate": 9.647351627656543e-06,
+      "loss": 1.7409,
+      "step": 205
+    },
+    {
+      "epoch": 0.8719576719576719,
+      "grad_norm": 6.913475036621094,
+      "learning_rate": 9.062951115656403e-06,
+      "loss": 3.3463,
+      "step": 206
+    },
+    {
+      "epoch": 0.8761904761904762,
+      "grad_norm": 5.359566688537598,
+      "learning_rate": 8.495967948696192e-06,
+      "loss": 2.696,
+      "step": 207
+    },
+    {
+      "epoch": 0.8804232804232804,
+      "grad_norm": 5.83837366104126,
+      "learning_rate": 7.946510722134692e-06,
+      "loss": 2.4312,
+      "step": 208
+    },
+    {
+      "epoch": 0.8846560846560847,
+      "grad_norm": 6.859252452850342,
+      "learning_rate": 7.4146846745541506e-06,
+      "loss": 3.2935,
+      "step": 209
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 8.060405731201172,
+      "learning_rate": 6.900591667603751e-06,
+      "loss": 3.5592,
+      "step": 210
+    },
+    {
+      "epoch": 0.8931216931216931,
+      "grad_norm": 5.689277172088623,
+      "learning_rate": 6.40433016648988e-06,
+      "loss": 2.2932,
+      "step": 211
+    },
+    {
+      "epoch": 0.8973544973544973,
+      "grad_norm": 6.73931360244751,
+      "learning_rate": 5.925995221116853e-06,
+      "loss": 2.5687,
+      "step": 212
+    },
+    {
+      "epoch": 0.9015873015873016,
+      "grad_norm": 6.137598991394043,
+      "learning_rate": 5.465678447881828e-06,
+      "loss": 3.1611,
+      "step": 213
+    },
+    {
+      "epoch": 0.9058201058201059,
+      "grad_norm": 5.905510902404785,
+      "learning_rate": 5.023468012127364e-06,
+      "loss": 2.3701,
+      "step": 214
+    },
+    {
+      "epoch": 0.91005291005291,
+      "grad_norm": 5.902157306671143,
+      "learning_rate": 4.599448611254964e-06,
+      "loss": 2.4836,
+      "step": 215
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 7.214301109313965,
+      "learning_rate": 4.193701458502807e-06,
+      "loss": 3.4721,
+      "step": 216
+    },
+    {
+      "epoch": 0.9185185185185185,
+      "grad_norm": 5.22310209274292,
+      "learning_rate": 3.80630426739077e-06,
+      "loss": 2.0146,
+      "step": 217
+    },
+    {
+      "epoch": 0.9227513227513228,
+      "grad_norm": 6.780308723449707,
+      "learning_rate": 3.4373312368358944e-06,
+      "loss": 2.3891,
+      "step": 218
+    },
+    {
+      "epoch": 0.926984126984127,
+      "grad_norm": 6.700711250305176,
+      "learning_rate": 3.086853036940862e-06,
+      "loss": 2.9335,
+      "step": 219
+    },
+    {
+      "epoch": 0.9312169312169312,
+      "grad_norm": 7.158792972564697,
+      "learning_rate": 2.754936795458485e-06,
+      "loss": 2.4858,
+      "step": 220
+    },
+    {
+      "epoch": 0.9354497354497354,
+      "grad_norm": 7.4189019203186035,
+      "learning_rate": 2.4416460849345123e-06,
+      "loss": 2.9778,
+      "step": 221
+    },
+    {
+      "epoch": 0.9396825396825397,
+      "grad_norm": 6.4551897048950195,
+      "learning_rate": 2.1470409105315283e-06,
+      "loss": 2.7154,
+      "step": 222
+    },
+    {
+      "epoch": 0.9439153439153439,
+      "grad_norm": 5.956814765930176,
+      "learning_rate": 1.8711776985360308e-06,
+      "loss": 2.3949,
+      "step": 223
+    },
+    {
+      "epoch": 0.9481481481481482,
+      "grad_norm": 5.735781669616699,
+      "learning_rate": 1.61410928555098e-06,
+      "loss": 2.5578,
+      "step": 224
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 6.24806022644043,
+      "learning_rate": 1.3758849083759352e-06,
+      "loss": 2.5578,
+      "step": 225
+    },
+    {
+      "epoch": 0.9566137566137566,
+      "grad_norm": 6.540648937225342,
+      "learning_rate": 1.1565501945766222e-06,
+      "loss": 2.7276,
+      "step": 226
+    },
+    {
+      "epoch": 0.9608465608465608,
+      "grad_norm": 6.847859859466553,
+      "learning_rate": 9.56147153745779e-07,
+      "loss": 2.3213,
+      "step": 227
+    },
+    {
+      "epoch": 0.9650793650793651,
+      "grad_norm": 8.415708541870117,
+      "learning_rate": 7.747141694570026e-07,
+      "loss": 3.2955,
+      "step": 228
+    },
+    {
+      "epoch": 0.9693121693121693,
+      "grad_norm": 5.680570125579834,
+      "learning_rate": 6.122859919130974e-07,
+      "loss": 3.0723,
+      "step": 229
+    },
+    {
+      "epoch": 0.9735449735449735,
+      "grad_norm": 5.512330055236816,
+      "learning_rate": 4.6889373129022085e-07,
+      "loss": 2.348,
+      "step": 230
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 6.558260440826416,
+      "learning_rate": 3.445648517793942e-07,
+      "loss": 2.3865,
+      "step": 231
+    },
+    {
+      "epoch": 0.982010582010582,
+      "grad_norm": 6.311702728271484,
+      "learning_rate": 2.3932316632614416e-07,
+      "loss": 2.8369,
+      "step": 232
+    },
+    {
+      "epoch": 0.9862433862433863,
+      "grad_norm": 5.8906354904174805,
+      "learning_rate": 1.5318883206962842e-07,
+      "loss": 2.6832,
+      "step": 233
+    },
+    {
+      "epoch": 0.9904761904761905,
+      "grad_norm": 5.275954723358154,
+      "learning_rate": 8.617834648185774e-08,
+      "loss": 2.6467,
+      "step": 234
+    },
+    {
+      "epoch": 0.9947089947089947,
+      "grad_norm": 6.683202743530273,
+      "learning_rate": 3.8304544207945495e-08,
+      "loss": 2.575,
+      "step": 235
+    },
+    {
+      "epoch": 0.9989417989417989,
+      "grad_norm": 5.911813259124756,
+      "learning_rate": 9.576594607807465e-09,
+      "loss": 2.1931,
+      "step": 236
+    },
+    {
+      "epoch": 1.0031746031746032,
+      "grad_norm": 6.805810451507568,
+      "learning_rate": 0.0,
+      "loss": 2.808,
+      "step": 237
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7072432993075200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null