Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4d716f3ed59f2f8365c5a83288c44f2c6a3497247e0d210db211da8e3457ea6
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:f265d20d146f4e7fa4c6f76f80c9c73b1f36dc7254474f156ce6ff3d91b9d486
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43f5f3e41db37e163d460049e7842b891a73ddcbcd37501643f24a7fba868bea
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:24182bdb9dd2b3d82b2ed9df6d90296d1464ebb2f1bda4c8022639f4e152b661
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69c32ff4b1846dda8adb32bac70ee2d25a8c5b5a521a5ab02ac03902d636cd53
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:23497d4d502105d9b958588fefb577aec8f2e43ebd8ab4f9c721b0a4560d33f0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d359eb5d29e75fb2bbe5b7026981da69b95b8ad1fea469302d13cde104f7e8a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.012402563355863094,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.005599260897561522,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 13.93,
       "eval_steps_per_second": 3.482,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.685806012294758e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.008957763202488422,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.011198521795123043,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.93,
       "eval_steps_per_second": 3.482,
       "step": 50
+    },
+    {
+      "epoch": 0.005711246115512753,
+      "grad_norm": 0.08151597529649734,
+      "learning_rate": 8.894386393810563e-05,
+      "loss": 0.0191,
+      "step": 51
+    },
+    {
+      "epoch": 0.005823231333463983,
+      "grad_norm": 0.043629955500364304,
+      "learning_rate": 8.842005554284296e-05,
+      "loss": 0.0075,
+      "step": 52
+    },
+    {
+      "epoch": 0.005935216551415213,
+      "grad_norm": 0.07190629094839096,
+      "learning_rate": 8.788574348801675e-05,
+      "loss": 0.008,
+      "step": 53
+    },
+    {
+      "epoch": 0.006047201769366443,
+      "grad_norm": 0.04685960337519646,
+      "learning_rate": 8.73410738492077e-05,
+      "loss": 0.0116,
+      "step": 54
+    },
+    {
+      "epoch": 0.006159186987317674,
+      "grad_norm": 0.03496837243437767,
+      "learning_rate": 8.678619553365659e-05,
+      "loss": 0.0115,
+      "step": 55
+    },
+    {
+      "epoch": 0.0062711722052689045,
+      "grad_norm": 0.07295988500118256,
+      "learning_rate": 8.622126023955446e-05,
+      "loss": 0.0077,
+      "step": 56
+    },
+    {
+      "epoch": 0.006383157423220135,
+      "grad_norm": 0.028815751895308495,
+      "learning_rate": 8.564642241456986e-05,
+      "loss": 0.0068,
+      "step": 57
+    },
+    {
+      "epoch": 0.006495142641171366,
+      "grad_norm": 0.07860519737005234,
+      "learning_rate": 8.506183921362443e-05,
+      "loss": 0.0067,
+      "step": 58
+    },
+    {
+      "epoch": 0.006607127859122596,
+      "grad_norm": 0.03098691627383232,
+      "learning_rate": 8.44676704559283e-05,
+      "loss": 0.0046,
+      "step": 59
+    },
+    {
+      "epoch": 0.006719113077073826,
+      "grad_norm": 0.04155720770359039,
+      "learning_rate": 8.386407858128706e-05,
+      "loss": 0.0056,
+      "step": 60
+    },
+    {
+      "epoch": 0.006831098295025056,
+      "grad_norm": 0.04726318642497063,
+      "learning_rate": 8.32512286056924e-05,
+      "loss": 0.0063,
+      "step": 61
+    },
+    {
+      "epoch": 0.006943083512976287,
+      "grad_norm": 0.14553825557231903,
+      "learning_rate": 8.262928807620843e-05,
+      "loss": 0.0127,
+      "step": 62
+    },
+    {
+      "epoch": 0.007055068730927518,
+      "grad_norm": 0.08409129083156586,
+      "learning_rate": 8.199842702516583e-05,
+      "loss": 0.0119,
+      "step": 63
+    },
+    {
+      "epoch": 0.007167053948878748,
+      "grad_norm": 0.0558687187731266,
+      "learning_rate": 8.135881792367686e-05,
+      "loss": 0.0062,
+      "step": 64
+    },
+    {
+      "epoch": 0.007279039166829979,
+      "grad_norm": 0.03517032787203789,
+      "learning_rate": 8.07106356344834e-05,
+      "loss": 0.0069,
+      "step": 65
+    },
+    {
+      "epoch": 0.007391024384781209,
+      "grad_norm": 0.03434384986758232,
+      "learning_rate": 8.005405736415126e-05,
+      "loss": 0.0029,
+      "step": 66
+    },
+    {
+      "epoch": 0.007503009602732439,
+      "grad_norm": 0.07082964479923248,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 0.0097,
+      "step": 67
+    },
+    {
+      "epoch": 0.007614994820683669,
+      "grad_norm": 0.06074490025639534,
+      "learning_rate": 7.871643313414718e-05,
+      "loss": 0.0049,
+      "step": 68
+    },
+    {
+      "epoch": 0.0077269800386349004,
+      "grad_norm": 0.07937470078468323,
+      "learning_rate": 7.803575286758364e-05,
+      "loss": 0.0132,
+      "step": 69
+    },
+    {
+      "epoch": 0.00783896525658613,
+      "grad_norm": 0.04968696087598801,
+      "learning_rate": 7.734740790612136e-05,
+      "loss": 0.0064,
+      "step": 70
+    },
+    {
+      "epoch": 0.00795095047453736,
+      "grad_norm": 0.05936945974826813,
+      "learning_rate": 7.66515864363997e-05,
+      "loss": 0.0067,
+      "step": 71
+    },
+    {
+      "epoch": 0.008062935692488591,
+      "grad_norm": 0.07118445634841919,
+      "learning_rate": 7.594847868906076e-05,
+      "loss": 0.0124,
+      "step": 72
+    },
+    {
+      "epoch": 0.008174920910439821,
+      "grad_norm": 0.048843711614608765,
+      "learning_rate": 7.52382768867422e-05,
+      "loss": 0.0106,
+      "step": 73
+    },
+    {
+      "epoch": 0.008286906128391053,
+      "grad_norm": 0.05531323701143265,
+      "learning_rate": 7.452117519152542e-05,
+      "loss": 0.006,
+      "step": 74
+    },
+    {
+      "epoch": 0.008398891346342283,
+      "grad_norm": 0.07174713909626007,
+      "learning_rate": 7.379736965185368e-05,
+      "loss": 0.0176,
+      "step": 75
+    },
+    {
+      "epoch": 0.008510876564293514,
+      "grad_norm": 0.09108234196901321,
+      "learning_rate": 7.30670581489344e-05,
+      "loss": 0.0071,
+      "step": 76
+    },
+    {
+      "epoch": 0.008622861782244744,
+      "grad_norm": 0.07062532752752304,
+      "learning_rate": 7.233044034264034e-05,
+      "loss": 0.0058,
+      "step": 77
+    },
+    {
+      "epoch": 0.008734847000195974,
+      "grad_norm": 0.05787373706698418,
+      "learning_rate": 7.158771761692464e-05,
+      "loss": 0.008,
+      "step": 78
+    },
+    {
+      "epoch": 0.008846832218147204,
+      "grad_norm": 0.07510489225387573,
+      "learning_rate": 7.083909302476453e-05,
+      "loss": 0.0067,
+      "step": 79
+    },
+    {
+      "epoch": 0.008958817436098434,
+      "grad_norm": 0.09543178230524063,
+      "learning_rate": 7.008477123264848e-05,
+      "loss": 0.0126,
+      "step": 80
+    },
+    {
+      "epoch": 0.009070802654049666,
+      "grad_norm": 0.08685528486967087,
+      "learning_rate": 6.932495846462261e-05,
+      "loss": 0.0086,
+      "step": 81
+    },
+    {
+      "epoch": 0.009182787872000896,
+      "grad_norm": 0.049802061170339584,
+      "learning_rate": 6.855986244591104e-05,
+      "loss": 0.009,
+      "step": 82
+    },
+    {
+      "epoch": 0.009294773089952127,
+      "grad_norm": 0.03666006773710251,
+      "learning_rate": 6.778969234612584e-05,
+      "loss": 0.0083,
+      "step": 83
+    },
+    {
+      "epoch": 0.009406758307903357,
+      "grad_norm": 0.06713581085205078,
+      "learning_rate": 6.701465872208216e-05,
+      "loss": 0.0053,
+      "step": 84
+    },
+    {
+      "epoch": 0.009518743525854587,
+      "grad_norm": 0.12403149157762527,
+      "learning_rate": 6.623497346023418e-05,
+      "loss": 0.0145,
+      "step": 85
+    },
+    {
+      "epoch": 0.009630728743805817,
+      "grad_norm": 0.1675027757883072,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 0.0113,
+      "step": 86
+    },
+    {
+      "epoch": 0.009742713961757047,
+      "grad_norm": 0.10205962508916855,
+      "learning_rate": 6.466250186922325e-05,
+      "loss": 0.0108,
+      "step": 87
+    },
+    {
+      "epoch": 0.00985469917970828,
+      "grad_norm": 0.09269709140062332,
+      "learning_rate": 6.387014543809223e-05,
+      "loss": 0.0051,
+      "step": 88
+    },
+    {
+      "epoch": 0.00996668439765951,
+      "grad_norm": 0.17211325466632843,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 0.0277,
+      "step": 89
+    },
+    {
+      "epoch": 0.01007866961561074,
+      "grad_norm": 0.4585292935371399,
+      "learning_rate": 6.227427435703997e-05,
+      "loss": 0.0204,
+      "step": 90
+    },
+    {
+      "epoch": 0.01019065483356197,
+      "grad_norm": 0.23112568259239197,
+      "learning_rate": 6.147119600233758e-05,
+      "loss": 0.0084,
+      "step": 91
+    },
+    {
+      "epoch": 0.0103026400515132,
+      "grad_norm": 0.09548697620630264,
+      "learning_rate": 6.066498153718735e-05,
+      "loss": 0.0113,
+      "step": 92
+    },
+    {
+      "epoch": 0.01041462526946443,
+      "grad_norm": 0.10480192303657532,
+      "learning_rate": 5.985585137257401e-05,
+      "loss": 0.0196,
+      "step": 93
+    },
+    {
+      "epoch": 0.01052661048741566,
+      "grad_norm": 0.053521186113357544,
+      "learning_rate": 5.90440267166055e-05,
+      "loss": 0.0035,
+      "step": 94
+    },
+    {
+      "epoch": 0.010638595705366892,
+      "grad_norm": 0.12508662045001984,
+      "learning_rate": 5.8229729514036705e-05,
+      "loss": 0.0046,
+      "step": 95
+    },
+    {
+      "epoch": 0.010750580923318123,
+      "grad_norm": 0.07000123709440231,
+      "learning_rate": 5.74131823855921e-05,
+      "loss": 0.0089,
+      "step": 96
+    },
+    {
+      "epoch": 0.010862566141269353,
+      "grad_norm": 0.13927717506885529,
+      "learning_rate": 5.6594608567103456e-05,
+      "loss": 0.0225,
+      "step": 97
+    },
+    {
+      "epoch": 0.010974551359220583,
+      "grad_norm": 0.36776211857795715,
+      "learning_rate": 5.577423184847932e-05,
+      "loss": 0.0061,
+      "step": 98
+    },
+    {
+      "epoch": 0.011086536577171813,
+      "grad_norm": 0.11161035299301147,
+      "learning_rate": 5.495227651252315e-05,
+      "loss": 0.0117,
+      "step": 99
+    },
+    {
+      "epoch": 0.011198521795123043,
+      "grad_norm": 0.10862310975790024,
+      "learning_rate": 5.4128967273616625e-05,
+      "loss": 0.0104,
+      "step": 100
+    },
+    {
+      "epoch": 0.011198521795123043,
+      "eval_loss": 0.008957763202488422,
+      "eval_runtime": 1080.5124,
+      "eval_samples_per_second": 13.919,
+      "eval_steps_per_second": 3.48,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.3371612024589517e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null