Training in progress, step 149, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +347 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0edf81e08968f34163bc0db35d46efdcae33ce717a6a80d73d6e87dea2fbb687
 size 381467712

 version https://git-lfs.github.com/spec/v1
+oid sha256:ddd2876e0a68bb8b7e959a04501787cffdab82fd97ca9977055aee5327255323
 size 381467712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c031591cf4dd76da2d24c8202807a3b694feb1ace00f6cc763c19626c9a64ab0
 size 194112692

 version https://git-lfs.github.com/spec/v1
+oid sha256:81668ee561c075c98c7a59efcf651ad7f78341d9ed87e7bd51098eb363c51a3c
 size 194112692

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0fe9d358728f3e2dfc39039512d5239b54756f9c232b67c69fe2b994b4ba419
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd6ebb4d1fcba13f470668229470b99a4a72d5e05b994155e9cfcbea87d34006
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:343b14e7ebf2086e0f8dcc5acab2659f6394ab8c83ebb899f766408511f6a705
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3167249cc14ab1cd4e7d1463e098421e293c9d1ad0924be094f1a6596bad0ce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.2716773748397827,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.0202020202020203,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -723,6 +723,349 @@
       "eval_samples_per_second": 35.425,
       "eval_steps_per_second": 8.856,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -746,12 +1089,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.844904293747917e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.2716773748397827,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.01010101010101,
   "eval_steps": 50,
+  "global_step": 149,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 35.425,
       "eval_steps_per_second": 8.856,
       "step": 100
+    },
+    {
+      "epoch": 2.04040404040404,
+      "grad_norm": 16.043392181396484,
+      "learning_rate": 2.664846361837997e-05,
+      "loss": 0.2829,
+      "step": 101
+    },
+    {
+      "epoch": 2.0606060606060606,
+      "grad_norm": 4.770566463470459,
+      "learning_rate": 2.565525897695651e-05,
+      "loss": 0.3002,
+      "step": 102
+    },
+    {
+      "epoch": 2.080808080808081,
+      "grad_norm": 15.801465034484863,
+      "learning_rate": 2.467448965126443e-05,
+      "loss": 0.2552,
+      "step": 103
+    },
+    {
+      "epoch": 2.101010101010101,
+      "grad_norm": 6.288998126983643,
+      "learning_rate": 2.3706656619162278e-05,
+      "loss": 0.2836,
+      "step": 104
+    },
+    {
+      "epoch": 2.121212121212121,
+      "grad_norm": 4.048598289489746,
+      "learning_rate": 2.2752254250638126e-05,
+      "loss": 0.2602,
+      "step": 105
+    },
+    {
+      "epoch": 2.1414141414141414,
+      "grad_norm": 5.017980098724365,
+      "learning_rate": 2.1811770055284968e-05,
+      "loss": 0.2913,
+      "step": 106
+    },
+    {
+      "epoch": 2.1616161616161618,
+      "grad_norm": 4.102363586425781,
+      "learning_rate": 2.0885684433280333e-05,
+      "loss": 0.2716,
+      "step": 107
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 4.771272659301758,
+      "learning_rate": 1.9974470429997483e-05,
+      "loss": 0.2735,
+      "step": 108
+    },
+    {
+      "epoch": 2.202020202020202,
+      "grad_norm": 4.773435592651367,
+      "learning_rate": 1.907859349437336e-05,
+      "loss": 0.2543,
+      "step": 109
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 8.052456855773926,
+      "learning_rate": 1.8198511241156903e-05,
+      "loss": 0.3169,
+      "step": 110
+    },
+    {
+      "epoch": 2.242424242424242,
+      "grad_norm": 7.02642297744751,
+      "learning_rate": 1.7334673217158974e-05,
+      "loss": 0.3384,
+      "step": 111
+    },
+    {
+      "epoch": 2.2626262626262625,
+      "grad_norm": 4.065310478210449,
+      "learning_rate": 1.6487520671623468e-05,
+      "loss": 0.3324,
+      "step": 112
+    },
+    {
+      "epoch": 2.282828282828283,
+      "grad_norm": 4.226308345794678,
+      "learning_rate": 1.5657486330836784e-05,
+      "loss": 0.283,
+      "step": 113
+    },
+    {
+      "epoch": 2.303030303030303,
+      "grad_norm": 4.673561096191406,
+      "learning_rate": 1.484499417709087e-05,
+      "loss": 0.2792,
+      "step": 114
+    },
+    {
+      "epoch": 2.323232323232323,
+      "grad_norm": 3.937140464782715,
+      "learning_rate": 1.405045923211265e-05,
+      "loss": 0.3049,
+      "step": 115
+    },
+    {
+      "epoch": 2.3434343434343434,
+      "grad_norm": 6.760053634643555,
+      "learning_rate": 1.3274287345070562e-05,
+      "loss": 0.2954,
+      "step": 116
+    },
+    {
+      "epoch": 2.3636363636363638,
+      "grad_norm": 4.757177352905273,
+      "learning_rate": 1.2516874985266508e-05,
+      "loss": 0.2701,
+      "step": 117
+    },
+    {
+      "epoch": 2.3838383838383836,
+      "grad_norm": 4.382113933563232,
+      "learning_rate": 1.1778609039618805e-05,
+      "loss": 0.2332,
+      "step": 118
+    },
+    {
+      "epoch": 2.404040404040404,
+      "grad_norm": 5.1120100021362305,
+      "learning_rate": 1.1059866615040204e-05,
+      "loss": 0.2603,
+      "step": 119
+    },
+    {
+      "epoch": 2.4242424242424243,
+      "grad_norm": 5.514906406402588,
+      "learning_rate": 1.0361014845811168e-05,
+      "loss": 0.3278,
+      "step": 120
+    },
+    {
+      "epoch": 2.4444444444444446,
+      "grad_norm": 4.801580429077148,
+      "learning_rate": 9.682410706047428e-06,
+      "loss": 0.2455,
+      "step": 121
+    },
+    {
+      "epoch": 2.4646464646464645,
+      "grad_norm": 4.759316921234131,
+      "learning_rate": 9.024400827357344e-06,
+      "loss": 0.2886,
+      "step": 122
+    },
+    {
+      "epoch": 2.484848484848485,
+      "grad_norm": 9.59177303314209,
+      "learning_rate": 8.387321321781976e-06,
+      "loss": 0.4744,
+      "step": 123
+    },
+    {
+      "epoch": 2.505050505050505,
+      "grad_norm": 5.647252559661865,
+      "learning_rate": 7.77149761010898e-06,
+      "loss": 0.31,
+      "step": 124
+    },
+    {
+      "epoch": 2.525252525252525,
+      "grad_norm": 4.252394676208496,
+      "learning_rate": 7.177244255647208e-06,
+      "loss": 0.2365,
+      "step": 125
+    },
+    {
+      "epoch": 2.5454545454545454,
+      "grad_norm": 7.538860321044922,
+      "learning_rate": 6.6048648035475115e-06,
+      "loss": 0.2448,
+      "step": 126
+    },
+    {
+      "epoch": 2.5656565656565657,
+      "grad_norm": 5.2316999435424805,
+      "learning_rate": 6.054651625751717e-06,
+      "loss": 0.3191,
+      "step": 127
+    },
+    {
+      "epoch": 2.5858585858585856,
+      "grad_norm": 4.985154151916504,
+      "learning_rate": 5.526885771648599e-06,
+      "loss": 0.2462,
+      "step": 128
+    },
+    {
+      "epoch": 2.606060606060606,
+      "grad_norm": 3.8869848251342773,
+      "learning_rate": 5.021836824513759e-06,
+      "loss": 0.2581,
+      "step": 129
+    },
+    {
+      "epoch": 2.6262626262626263,
+      "grad_norm": 4.702490329742432,
+      "learning_rate": 4.53976276380616e-06,
+      "loss": 0.3093,
+      "step": 130
+    },
+    {
+      "epoch": 2.6464646464646466,
+      "grad_norm": 4.508066177368164,
+      "learning_rate": 4.080909833391944e-06,
+      "loss": 0.301,
+      "step": 131
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 5.0213446617126465,
+      "learning_rate": 3.6455124157629805e-06,
+      "loss": 0.3258,
+      "step": 132
+    },
+    {
+      "epoch": 2.686868686868687,
+      "grad_norm": 5.15741491317749,
+      "learning_rate": 3.2337929123139434e-06,
+      "loss": 0.2747,
+      "step": 133
+    },
+    {
+      "epoch": 2.707070707070707,
+      "grad_norm": 4.806821823120117,
+      "learning_rate": 2.8459616297395466e-06,
+      "loss": 0.2677,
+      "step": 134
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 6.16556978225708,
+      "learning_rate": 2.4822166726096774e-06,
+      "loss": 0.3746,
+      "step": 135
+    },
+    {
+      "epoch": 2.7474747474747474,
+      "grad_norm": 4.232897758483887,
+      "learning_rate": 2.142743842177386e-06,
+      "loss": 0.2943,
+      "step": 136
+    },
+    {
+      "epoch": 2.7676767676767677,
+      "grad_norm": 4.609518051147461,
+      "learning_rate": 1.827716541471486e-06,
+      "loss": 0.2712,
+      "step": 137
+    },
+    {
+      "epoch": 2.787878787878788,
+      "grad_norm": 5.150665760040283,
+      "learning_rate": 1.5372956867220677e-06,
+      "loss": 0.2823,
+      "step": 138
+    },
+    {
+      "epoch": 2.808080808080808,
+      "grad_norm": 3.9823038578033447,
+      "learning_rate": 1.2716296251644e-06,
+      "loss": 0.2736,
+      "step": 139
+    },
+    {
+      "epoch": 2.8282828282828283,
+      "grad_norm": 5.216023921966553,
+      "learning_rate": 1.0308540592629756e-06,
+      "loss": 0.3313,
+      "step": 140
+    },
+    {
+      "epoch": 2.8484848484848486,
+      "grad_norm": 4.384965896606445,
+      "learning_rate": 8.150919773946164e-07,
+      "loss": 0.2674,
+      "step": 141
+    },
+    {
+      "epoch": 2.8686868686868685,
+      "grad_norm": 4.805583953857422,
+      "learning_rate": 6.244535910258698e-07,
+      "loss": 0.2695,
+      "step": 142
+    },
+    {
+      "epoch": 2.888888888888889,
+      "grad_norm": 4.811954021453857,
+      "learning_rate": 4.590362784169022e-07,
+      "loss": 0.2901,
+      "step": 143
+    },
+    {
+      "epoch": 2.909090909090909,
+      "grad_norm": 5.8568949699401855,
+      "learning_rate": 3.1892453488058803e-07,
+      "loss": 0.3121,
+      "step": 144
+    },
+    {
+      "epoch": 2.929292929292929,
+      "grad_norm": 4.470373153686523,
+      "learning_rate": 2.0418992962224492e-07,
+      "loss": 0.2924,
+      "step": 145
+    },
+    {
+      "epoch": 2.9494949494949494,
+      "grad_norm": 5.206216335296631,
+      "learning_rate": 1.1489106918200487e-07,
+      "loss": 0.3076,
+      "step": 146
+    },
+    {
+      "epoch": 2.9696969696969697,
+      "grad_norm": 5.941263198852539,
+      "learning_rate": 5.107356749853298e-08,
+      "loss": 0.3061,
+      "step": 147
+    },
+    {
+      "epoch": 2.98989898989899,
+      "grad_norm": 5.180295467376709,
+      "learning_rate": 1.2770022609409626e-08,
+      "loss": 0.2699,
+      "step": 148
+    },
+    {
+      "epoch": 3.01010101010101,
+      "grad_norm": 5.430907249450684,
+      "learning_rate": 0.0,
+      "loss": 0.3501,
+      "step": 149
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.708870844186624e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null