File size: 5,063 Bytes
06fa034 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.75609756097561,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3902439024390244,
"grad_norm": 0.9049399495124817,
"learning_rate": 4.347826086956522e-05,
"loss": 2.2562,
"step": 20
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.8101117610931396,
"learning_rate": 4.88544474393531e-05,
"loss": 2.1366,
"step": 40
},
{
"epoch": 1.170731707317073,
"grad_norm": 0.6929331421852112,
"learning_rate": 4.750673854447439e-05,
"loss": 1.9538,
"step": 60
},
{
"epoch": 1.5609756097560976,
"grad_norm": 0.754576563835144,
"learning_rate": 4.615902964959569e-05,
"loss": 1.9039,
"step": 80
},
{
"epoch": 1.951219512195122,
"grad_norm": 0.8291248679161072,
"learning_rate": 4.4811320754716985e-05,
"loss": 1.8062,
"step": 100
},
{
"epoch": 2.341463414634146,
"grad_norm": 0.9486225247383118,
"learning_rate": 4.3463611859838275e-05,
"loss": 1.7765,
"step": 120
},
{
"epoch": 2.7317073170731705,
"grad_norm": 1.1279809474945068,
"learning_rate": 4.211590296495957e-05,
"loss": 1.7288,
"step": 140
},
{
"epoch": 3.1219512195121952,
"grad_norm": 1.1358485221862793,
"learning_rate": 4.076819407008086e-05,
"loss": 1.7136,
"step": 160
},
{
"epoch": 3.5121951219512195,
"grad_norm": 1.2939114570617676,
"learning_rate": 3.942048517520216e-05,
"loss": 1.698,
"step": 180
},
{
"epoch": 3.902439024390244,
"grad_norm": 1.3795692920684814,
"learning_rate": 3.807277628032345e-05,
"loss": 1.668,
"step": 200
},
{
"epoch": 4.2926829268292686,
"grad_norm": 1.2294673919677734,
"learning_rate": 3.672506738544474e-05,
"loss": 1.6258,
"step": 220
},
{
"epoch": 4.682926829268292,
"grad_norm": 1.4048880338668823,
"learning_rate": 3.537735849056604e-05,
"loss": 1.6133,
"step": 240
},
{
"epoch": 5.073170731707317,
"grad_norm": 1.1704691648483276,
"learning_rate": 3.4029649595687336e-05,
"loss": 1.6349,
"step": 260
},
{
"epoch": 5.463414634146342,
"grad_norm": 1.4525257349014282,
"learning_rate": 3.2681940700808625e-05,
"loss": 1.5788,
"step": 280
},
{
"epoch": 5.853658536585366,
"grad_norm": 1.5394439697265625,
"learning_rate": 3.133423180592992e-05,
"loss": 1.5827,
"step": 300
},
{
"epoch": 6.2439024390243905,
"grad_norm": 1.5792720317840576,
"learning_rate": 2.998652291105121e-05,
"loss": 1.5451,
"step": 320
},
{
"epoch": 6.634146341463414,
"grad_norm": 1.6444499492645264,
"learning_rate": 2.863881401617251e-05,
"loss": 1.5757,
"step": 340
},
{
"epoch": 7.024390243902439,
"grad_norm": 1.5749609470367432,
"learning_rate": 2.7291105121293804e-05,
"loss": 1.5414,
"step": 360
},
{
"epoch": 7.414634146341464,
"grad_norm": 1.6040682792663574,
"learning_rate": 2.5943396226415094e-05,
"loss": 1.5311,
"step": 380
},
{
"epoch": 7.804878048780488,
"grad_norm": 1.7397934198379517,
"learning_rate": 2.459568733153639e-05,
"loss": 1.534,
"step": 400
},
{
"epoch": 8.195121951219512,
"grad_norm": 1.9339927434921265,
"learning_rate": 2.3247978436657683e-05,
"loss": 1.5277,
"step": 420
},
{
"epoch": 8.585365853658537,
"grad_norm": 1.8686648607254028,
"learning_rate": 2.1900269541778976e-05,
"loss": 1.496,
"step": 440
},
{
"epoch": 8.975609756097562,
"grad_norm": 1.8943285942077637,
"learning_rate": 2.055256064690027e-05,
"loss": 1.4955,
"step": 460
},
{
"epoch": 9.365853658536585,
"grad_norm": 2.715195894241333,
"learning_rate": 1.9204851752021562e-05,
"loss": 1.4837,
"step": 480
},
{
"epoch": 9.75609756097561,
"grad_norm": 2.464538097381592,
"learning_rate": 1.785714285714286e-05,
"loss": 1.5082,
"step": 500
}
],
"logging_steps": 20,
"max_steps": 765,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.137043399013171e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|