Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2cece2e0d04d1e693e8d27fd443e243edce9f89dd096305cb13eb1004a6923e2
 size 222865880

 version https://git-lfs.github.com/spec/v1
+oid sha256:26f15b14cb1df943d4db78b8b74cbb52fcf4aa006038b2bcb76f026b36e251c0
 size 222865880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e64c7c5a57c9760908d645241f3cd948b973586f752eee94bf60dc4ddcce199e
 size 445914554

 version https://git-lfs.github.com/spec/v1
+oid sha256:9538f95b328d041329c05d3c192f9c21b9a7b90e8c126a6cc180d2198767bd44
 size 445914554

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13225039970d62368a8d7fe9637ca29897b393accd9919b6f5928823c25bb9a0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:73997c2d1137ebb24913cad3ad9fd87a06c23cb2260121c50de664fa8e4957e7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad2841b888ce0ae948634757c3fcacf0119c249e0fec8f3ca61ea266369ef92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d2a6c6aafc669cea03b9634666f204de949a3d45ce2f48a07e7e3eaf18c715
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.33410218358039856,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.012825445684237527,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 1.57,
       "eval_steps_per_second": 0.22,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.7850713268224e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.3280201852321625,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.025650891368475055,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1.57,
       "eval_steps_per_second": 0.22,
       "step": 50
+    },
+    {
+      "epoch": 0.013081954597922277,
+      "grad_norm": 0.581642210483551,
+      "learning_rate": 0.0002668315918143169,
+      "loss": 1.2623,
+      "step": 51
+    },
+    {
+      "epoch": 0.013338463511607028,
+      "grad_norm": 0.468250036239624,
+      "learning_rate": 0.00026526016662852886,
+      "loss": 1.2536,
+      "step": 52
+    },
+    {
+      "epoch": 0.01359497242529178,
+      "grad_norm": 0.3212797939777374,
+      "learning_rate": 0.00026365723046405023,
+      "loss": 1.1647,
+      "step": 53
+    },
+    {
+      "epoch": 0.01385148133897653,
+      "grad_norm": 0.24367539584636688,
+      "learning_rate": 0.0002620232215476231,
+      "loss": 1.2143,
+      "step": 54
+    },
+    {
+      "epoch": 0.01410799025266128,
+      "grad_norm": 0.1994316130876541,
+      "learning_rate": 0.0002603585866009697,
+      "loss": 1.1233,
+      "step": 55
+    },
+    {
+      "epoch": 0.01436449916634603,
+      "grad_norm": 0.1888861209154129,
+      "learning_rate": 0.00025866378071866334,
+      "loss": 1.0761,
+      "step": 56
+    },
+    {
+      "epoch": 0.014621008080030782,
+      "grad_norm": 0.19591979682445526,
+      "learning_rate": 0.00025693926724370956,
+      "loss": 1.1953,
+      "step": 57
+    },
+    {
+      "epoch": 0.014877516993715532,
+      "grad_norm": 0.20236290991306305,
+      "learning_rate": 0.00025518551764087326,
+      "loss": 1.0408,
+      "step": 58
+    },
+    {
+      "epoch": 0.015134025907400282,
+      "grad_norm": 0.21848787367343903,
+      "learning_rate": 0.00025340301136778483,
+      "loss": 1.1106,
+      "step": 59
+    },
+    {
+      "epoch": 0.015390534821085032,
+      "grad_norm": 0.1914772242307663,
+      "learning_rate": 0.00025159223574386114,
+      "loss": 1.0984,
+      "step": 60
+    },
+    {
+      "epoch": 0.015647043734769783,
+      "grad_norm": 0.18511377274990082,
+      "learning_rate": 0.0002497536858170772,
+      "loss": 1.0421,
+      "step": 61
+    },
+    {
+      "epoch": 0.015903552648454534,
+      "grad_norm": 0.20475885272026062,
+      "learning_rate": 0.00024788786422862526,
+      "loss": 1.1696,
+      "step": 62
+    },
+    {
+      "epoch": 0.016160061562139283,
+      "grad_norm": 0.17918260395526886,
+      "learning_rate": 0.00024599528107549745,
+      "loss": 1.1129,
+      "step": 63
+    },
+    {
+      "epoch": 0.016416570475824035,
+      "grad_norm": 0.18487805128097534,
+      "learning_rate": 0.00024407645377103054,
+      "loss": 1.048,
+      "step": 64
+    },
+    {
+      "epoch": 0.016673079389508787,
+      "grad_norm": 0.15813754498958588,
+      "learning_rate": 0.00024213190690345018,
+      "loss": 1.1329,
+      "step": 65
+    },
+    {
+      "epoch": 0.016929588303193535,
+      "grad_norm": 0.17792178690433502,
+      "learning_rate": 0.00024016217209245374,
+      "loss": 1.0894,
+      "step": 66
+    },
+    {
+      "epoch": 0.017186097216878287,
+      "grad_norm": 0.19200259447097778,
+      "learning_rate": 0.00023816778784387094,
+      "loss": 1.0754,
+      "step": 67
+    },
+    {
+      "epoch": 0.017442606130563035,
+      "grad_norm": 0.17065322399139404,
+      "learning_rate": 0.0002361492994024415,
+      "loss": 1.0666,
+      "step": 68
+    },
+    {
+      "epoch": 0.017699115044247787,
+      "grad_norm": 0.18813008069992065,
+      "learning_rate": 0.0002341072586027509,
+      "loss": 1.2129,
+      "step": 69
+    },
+    {
+      "epoch": 0.01795562395793254,
+      "grad_norm": 0.20512309670448303,
+      "learning_rate": 0.00023204222371836405,
+      "loss": 1.0896,
+      "step": 70
+    },
+    {
+      "epoch": 0.018212132871617288,
+      "grad_norm": 0.16995452344417572,
+      "learning_rate": 0.00022995475930919905,
+      "loss": 1.0235,
+      "step": 71
+    },
+    {
+      "epoch": 0.01846864178530204,
+      "grad_norm": 0.1726696640253067,
+      "learning_rate": 0.00022784543606718227,
+      "loss": 1.171,
+      "step": 72
+    },
+    {
+      "epoch": 0.01872515069898679,
+      "grad_norm": 0.20911255478858948,
+      "learning_rate": 0.00022571483066022657,
+      "loss": 1.3302,
+      "step": 73
+    },
+    {
+      "epoch": 0.01898165961267154,
+      "grad_norm": 0.1987486630678177,
+      "learning_rate": 0.0002235635255745762,
+      "loss": 1.2373,
+      "step": 74
+    },
+    {
+      "epoch": 0.019238168526356292,
+      "grad_norm": 0.19390402734279633,
+      "learning_rate": 0.00022139210895556104,
+      "loss": 1.2301,
+      "step": 75
+    },
+    {
+      "epoch": 0.019238168526356292,
+      "eval_loss": 0.294239342212677,
+      "eval_runtime": 31.8459,
+      "eval_samples_per_second": 1.57,
+      "eval_steps_per_second": 0.22,
+      "step": 75
+    },
+    {
+      "epoch": 0.01949467744004104,
+      "grad_norm": 0.3244732618331909,
+      "learning_rate": 0.00021920117444680317,
+      "loss": 1.2272,
+      "step": 76
+    },
+    {
+      "epoch": 0.019751186353725792,
+      "grad_norm": 0.2098388969898224,
+      "learning_rate": 0.00021699132102792097,
+      "loss": 1.1795,
+      "step": 77
+    },
+    {
+      "epoch": 0.020007695267410544,
+      "grad_norm": 0.20879194140434265,
+      "learning_rate": 0.0002147631528507739,
+      "loss": 1.1792,
+      "step": 78
+    },
+    {
+      "epoch": 0.020264204181095292,
+      "grad_norm": 0.19269469380378723,
+      "learning_rate": 0.00021251727907429355,
+      "loss": 1.1004,
+      "step": 79
+    },
+    {
+      "epoch": 0.020520713094780044,
+      "grad_norm": 0.21726348996162415,
+      "learning_rate": 0.0002102543136979454,
+      "loss": 1.2513,
+      "step": 80
+    },
+    {
+      "epoch": 0.020777222008464793,
+      "grad_norm": 0.2271123230457306,
+      "learning_rate": 0.0002079748753938678,
+      "loss": 1.3485,
+      "step": 81
+    },
+    {
+      "epoch": 0.021033730922149545,
+      "grad_norm": 0.2089950293302536,
+      "learning_rate": 0.0002056795873377331,
+      "loss": 1.324,
+      "step": 82
+    },
+    {
+      "epoch": 0.021290239835834297,
+      "grad_norm": 0.19828511774539948,
+      "learning_rate": 0.00020336907703837748,
+      "loss": 1.2114,
+      "step": 83
+    },
+    {
+      "epoch": 0.021546748749519045,
+      "grad_norm": 0.3832542300224304,
+      "learning_rate": 0.00020104397616624645,
+      "loss": 1.3824,
+      "step": 84
+    },
+    {
+      "epoch": 0.021803257663203797,
+      "grad_norm": 0.19173561036586761,
+      "learning_rate": 0.00019870492038070252,
+      "loss": 1.0925,
+      "step": 85
+    },
+    {
+      "epoch": 0.022059766576888545,
+      "grad_norm": 0.23011600971221924,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 1.1248,
+      "step": 86
+    },
+    {
+      "epoch": 0.022316275490573297,
+      "grad_norm": 0.21012580394744873,
+      "learning_rate": 0.0001939875056076697,
+      "loss": 1.2874,
+      "step": 87
+    },
+    {
+      "epoch": 0.02257278440425805,
+      "grad_norm": 0.3018735647201538,
+      "learning_rate": 0.00019161043631427666,
+      "loss": 1.2895,
+      "step": 88
+    },
+    {
+      "epoch": 0.022829293317942798,
+      "grad_norm": 0.2396928071975708,
+      "learning_rate": 0.00018922199114307294,
+      "loss": 1.2793,
+      "step": 89
+    },
+    {
+      "epoch": 0.02308580223162755,
+      "grad_norm": 0.25280284881591797,
+      "learning_rate": 0.00018682282307111987,
+      "loss": 1.3991,
+      "step": 90
+    },
+    {
+      "epoch": 0.023342311145312298,
+      "grad_norm": 0.27613183856010437,
+      "learning_rate": 0.00018441358800701273,
+      "loss": 1.2545,
+      "step": 91
+    },
+    {
+      "epoch": 0.02359882005899705,
+      "grad_norm": 0.27307531237602234,
+      "learning_rate": 0.00018199494461156203,
+      "loss": 1.1296,
+      "step": 92
+    },
+    {
+      "epoch": 0.0238553289726818,
+      "grad_norm": 0.28749963641166687,
+      "learning_rate": 0.000179567554117722,
+      "loss": 1.1461,
+      "step": 93
+    },
+    {
+      "epoch": 0.02411183788636655,
+      "grad_norm": 0.3967796266078949,
+      "learning_rate": 0.00017713208014981648,
+      "loss": 1.3377,
+      "step": 94
+    },
+    {
+      "epoch": 0.024368346800051302,
+      "grad_norm": 0.3728596866130829,
+      "learning_rate": 0.00017468918854211007,
+      "loss": 1.2604,
+      "step": 95
+    },
+    {
+      "epoch": 0.024624855713736054,
+      "grad_norm": 0.45030513405799866,
+      "learning_rate": 0.00017223954715677627,
+      "loss": 1.0331,
+      "step": 96
+    },
+    {
+      "epoch": 0.024881364627420802,
+      "grad_norm": 0.8793625235557556,
+      "learning_rate": 0.00016978382570131034,
+      "loss": 1.2188,
+      "step": 97
+    },
+    {
+      "epoch": 0.025137873541105554,
+      "grad_norm": 0.9390032291412354,
+      "learning_rate": 0.00016732269554543794,
+      "loss": 1.3501,
+      "step": 98
+    },
+    {
+      "epoch": 0.025394382454790303,
+      "grad_norm": 0.9944431185722351,
+      "learning_rate": 0.00016485682953756942,
+      "loss": 1.5824,
+      "step": 99
+    },
+    {
+      "epoch": 0.025650891368475055,
+      "grad_norm": 1.055663824081421,
+      "learning_rate": 0.00016238690182084986,
+      "loss": 1.3991,
+      "step": 100
+    },
+    {
+      "epoch": 0.025650891368475055,
+      "eval_loss": 0.3280201852321625,
+      "eval_runtime": 31.818,
+      "eval_samples_per_second": 1.571,
+      "eval_steps_per_second": 0.22,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.57737660514304e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null