Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10a562e3b54256e648e8e763db4534fca8ff50159916cb9f3f734b2e9b3a87dc
 size 332316480

 version https://git-lfs.github.com/spec/v1
+oid sha256:972aa9ffe87080f27071031d0e4d226dffe8f3ed6fbff6743ae8a659ec8ab968
 size 332316480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:799e1de59c7fb54c830829de04ad12f2e2e53bc837c474c81657015f76ec40b8
 size 169157892

 version https://git-lfs.github.com/spec/v1
+oid sha256:17f24c0296a81f84cf38b6c10994df2d8d2d5bea05eb21e5ea6ae4aeae9b1d22
 size 169157892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd5908c818a08129a979c62e339b8331e05a2b0fe5c4ab8a7ba6f3a3f27e6e99
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0e394433f831db42905c4a4dcb8562be06e854e5b31c169d95b91d32afffc57
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d359eb5d29e75fb2bbe5b7026981da69b95b8ad1fea469302d13cde104f7e8a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6396291255950928,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.5076142131979695,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 21.654,
       "eval_steps_per_second": 5.479,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1132769329414144e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5486623048782349,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.015228426395939,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.654,
       "eval_steps_per_second": 5.479,
       "step": 50
+    },
+    {
+      "epoch": 0.5177664974619289,
+      "grad_norm": 0.5438342690467834,
+      "learning_rate": 8.894386393810563e-05,
+      "loss": 1.6741,
+      "step": 51
+    },
+    {
+      "epoch": 0.5279187817258884,
+      "grad_norm": 0.480343222618103,
+      "learning_rate": 8.842005554284296e-05,
+      "loss": 1.6306,
+      "step": 52
+    },
+    {
+      "epoch": 0.5380710659898477,
+      "grad_norm": 0.4987044036388397,
+      "learning_rate": 8.788574348801675e-05,
+      "loss": 1.6827,
+      "step": 53
+    },
+    {
+      "epoch": 0.5482233502538071,
+      "grad_norm": 0.5006952881813049,
+      "learning_rate": 8.73410738492077e-05,
+      "loss": 1.7046,
+      "step": 54
+    },
+    {
+      "epoch": 0.5583756345177665,
+      "grad_norm": 0.5399014949798584,
+      "learning_rate": 8.678619553365659e-05,
+      "loss": 1.6806,
+      "step": 55
+    },
+    {
+      "epoch": 0.5685279187817259,
+      "grad_norm": 0.5678799152374268,
+      "learning_rate": 8.622126023955446e-05,
+      "loss": 1.7275,
+      "step": 56
+    },
+    {
+      "epoch": 0.5786802030456852,
+      "grad_norm": 0.5736947059631348,
+      "learning_rate": 8.564642241456986e-05,
+      "loss": 1.8071,
+      "step": 57
+    },
+    {
+      "epoch": 0.5888324873096447,
+      "grad_norm": 0.5423876047134399,
+      "learning_rate": 8.506183921362443e-05,
+      "loss": 1.6724,
+      "step": 58
+    },
+    {
+      "epoch": 0.5989847715736041,
+      "grad_norm": 0.6047144532203674,
+      "learning_rate": 8.44676704559283e-05,
+      "loss": 1.7872,
+      "step": 59
+    },
+    {
+      "epoch": 0.6091370558375635,
+      "grad_norm": 0.5819637179374695,
+      "learning_rate": 8.386407858128706e-05,
+      "loss": 1.7783,
+      "step": 60
+    },
+    {
+      "epoch": 0.6192893401015228,
+      "grad_norm": 0.6512382626533508,
+      "learning_rate": 8.32512286056924e-05,
+      "loss": 1.6646,
+      "step": 61
+    },
+    {
+      "epoch": 0.6294416243654822,
+      "grad_norm": 0.6138646602630615,
+      "learning_rate": 8.262928807620843e-05,
+      "loss": 1.7131,
+      "step": 62
+    },
+    {
+      "epoch": 0.6395939086294417,
+      "grad_norm": 0.6422168016433716,
+      "learning_rate": 8.199842702516583e-05,
+      "loss": 1.6838,
+      "step": 63
+    },
+    {
+      "epoch": 0.649746192893401,
+      "grad_norm": 0.6840230226516724,
+      "learning_rate": 8.135881792367686e-05,
+      "loss": 1.7402,
+      "step": 64
+    },
+    {
+      "epoch": 0.6598984771573604,
+      "grad_norm": 0.6549532413482666,
+      "learning_rate": 8.07106356344834e-05,
+      "loss": 1.6616,
+      "step": 65
+    },
+    {
+      "epoch": 0.6700507614213198,
+      "grad_norm": 0.742430567741394,
+      "learning_rate": 8.005405736415126e-05,
+      "loss": 1.6679,
+      "step": 66
+    },
+    {
+      "epoch": 0.6802030456852792,
+      "grad_norm": 0.7295668721199036,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 1.6294,
+      "step": 67
+    },
+    {
+      "epoch": 0.6903553299492385,
+      "grad_norm": 0.8265715837478638,
+      "learning_rate": 7.871643313414718e-05,
+      "loss": 1.8901,
+      "step": 68
+    },
+    {
+      "epoch": 0.700507614213198,
+      "grad_norm": 0.7593281269073486,
+      "learning_rate": 7.803575286758364e-05,
+      "loss": 1.5688,
+      "step": 69
+    },
+    {
+      "epoch": 0.7106598984771574,
+      "grad_norm": 0.8952975869178772,
+      "learning_rate": 7.734740790612136e-05,
+      "loss": 1.7755,
+      "step": 70
+    },
+    {
+      "epoch": 0.7208121827411168,
+      "grad_norm": 1.0053350925445557,
+      "learning_rate": 7.66515864363997e-05,
+      "loss": 1.5932,
+      "step": 71
+    },
+    {
+      "epoch": 0.7309644670050761,
+      "grad_norm": 0.9705185294151306,
+      "learning_rate": 7.594847868906076e-05,
+      "loss": 1.5989,
+      "step": 72
+    },
+    {
+      "epoch": 0.7411167512690355,
+      "grad_norm": 0.4911041557788849,
+      "learning_rate": 7.52382768867422e-05,
+      "loss": 1.708,
+      "step": 73
+    },
+    {
+      "epoch": 0.751269035532995,
+      "grad_norm": 0.5251993536949158,
+      "learning_rate": 7.452117519152542e-05,
+      "loss": 1.8025,
+      "step": 74
+    },
+    {
+      "epoch": 0.7614213197969543,
+      "grad_norm": 0.5147970914840698,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 1.5758,
+      "step": 75
+    },
+    {
+      "epoch": 0.7715736040609137,
+      "grad_norm": 0.5723939538002014,
+      "learning_rate": 7.30670581489344e-05,
+      "loss": 1.7717,
+      "step": 76
+    },
+    {
+      "epoch": 0.7817258883248731,
+      "grad_norm": 0.5487803816795349,
+      "learning_rate": 7.233044034264034e-05,
+      "loss": 1.6682,
+      "step": 77
+    },
+    {
+      "epoch": 0.7918781725888325,
+      "grad_norm": 0.5720694065093994,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 1.7009,
+      "step": 78
+    },
+    {
+      "epoch": 0.8020304568527918,
+      "grad_norm": 0.5712814927101135,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 1.6973,
+      "step": 79
+    },
+    {
+      "epoch": 0.8121827411167513,
+      "grad_norm": 0.5587448477745056,
+      "learning_rate": 7.008477123264848e-05,
+      "loss": 1.5849,
+      "step": 80
+    },
+    {
+      "epoch": 0.8223350253807107,
+      "grad_norm": 0.6082424521446228,
+      "learning_rate": 6.932495846462261e-05,
+      "loss": 1.7704,
+      "step": 81
+    },
+    {
+      "epoch": 0.8324873096446701,
+      "grad_norm": 0.6438080072402954,
+      "learning_rate": 6.855986244591104e-05,
+      "loss": 1.7454,
+      "step": 82
+    },
+    {
+      "epoch": 0.8426395939086294,
+      "grad_norm": 0.6118745803833008,
+      "learning_rate": 6.778969234612584e-05,
+      "loss": 1.5767,
+      "step": 83
+    },
+    {
+      "epoch": 0.8527918781725888,
+      "grad_norm": 0.6005804538726807,
+      "learning_rate": 6.701465872208216e-05,
+      "loss": 1.5711,
+      "step": 84
+    },
+    {
+      "epoch": 0.8629441624365483,
+      "grad_norm": 0.6292843222618103,
+      "learning_rate": 6.623497346023418e-05,
+      "loss": 1.5878,
+      "step": 85
+    },
+    {
+      "epoch": 0.8730964467005076,
+      "grad_norm": 0.6168345808982849,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 1.4827,
+      "step": 86
+    },
+    {
+      "epoch": 0.883248730964467,
+      "grad_norm": 0.6699876189231873,
+      "learning_rate": 6.466250186922325e-05,
+      "loss": 1.6753,
+      "step": 87
+    },
+    {
+      "epoch": 0.8934010152284264,
+      "grad_norm": 0.7486642003059387,
+      "learning_rate": 6.387014543809223e-05,
+      "loss": 1.5591,
+      "step": 88
+    },
+    {
+      "epoch": 0.9035532994923858,
+      "grad_norm": 0.716096818447113,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 1.5256,
+      "step": 89
+    },
+    {
+      "epoch": 0.9137055837563451,
+      "grad_norm": 0.6980071663856506,
+      "learning_rate": 6.227427435703997e-05,
+      "loss": 1.6128,
+      "step": 90
+    },
+    {
+      "epoch": 0.9238578680203046,
+      "grad_norm": 0.776879072189331,
+      "learning_rate": 6.147119600233758e-05,
+      "loss": 1.5798,
+      "step": 91
+    },
+    {
+      "epoch": 0.934010152284264,
+      "grad_norm": 0.8649686574935913,
+      "learning_rate": 6.066498153718735e-05,
+      "loss": 1.6465,
+      "step": 92
+    },
+    {
+      "epoch": 0.9441624365482234,
+      "grad_norm": 0.8168870210647583,
+      "learning_rate": 5.985585137257401e-05,
+      "loss": 1.7388,
+      "step": 93
+    },
+    {
+      "epoch": 0.9543147208121827,
+      "grad_norm": 0.836371123790741,
+      "learning_rate": 5.90440267166055e-05,
+      "loss": 1.5316,
+      "step": 94
+    },
+    {
+      "epoch": 0.9644670050761421,
+      "grad_norm": 0.975714385509491,
+      "learning_rate": 5.8229729514036705e-05,
+      "loss": 1.6221,
+      "step": 95
+    },
+    {
+      "epoch": 0.9746192893401016,
+      "grad_norm": 1.0654964447021484,
+      "learning_rate": 5.74131823855921e-05,
+      "loss": 1.6136,
+      "step": 96
+    },
+    {
+      "epoch": 0.9847715736040609,
+      "grad_norm": 0.6085041165351868,
+      "learning_rate": 5.6594608567103456e-05,
+      "loss": 1.6456,
+      "step": 97
+    },
+    {
+      "epoch": 0.9949238578680203,
+      "grad_norm": 0.7240885496139526,
+      "learning_rate": 5.577423184847932e-05,
+      "loss": 1.5469,
+      "step": 98
+    },
+    {
+      "epoch": 1.0050761421319796,
+      "grad_norm": 1.5985159873962402,
+      "learning_rate": 5.495227651252315e-05,
+      "loss": 2.3111,
+      "step": 99
+    },
+    {
+      "epoch": 1.015228426395939,
+      "grad_norm": 0.5017366409301758,
+      "learning_rate": 5.4128967273616625e-05,
+      "loss": 1.5656,
+      "step": 100
+    },
+    {
+      "epoch": 1.015228426395939,
+      "eval_loss": 1.5486623048782349,
+      "eval_runtime": 7.6734,
+      "eval_samples_per_second": 21.633,
+      "eval_steps_per_second": 5.473,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.2084252647424e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null