|
{ |
|
"best_metric": 0.8791208791208791, |
|
"best_model_checkpoint": "vit-msn-small-lateral_flow_ivalidation_train_test_6/checkpoint-318", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 8.29874038696289, |
|
"learning_rate": 2.7777777777777776e-09, |
|
"loss": 0.6813, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 14.01459789276123, |
|
"learning_rate": 5.555555555555555e-09, |
|
"loss": 0.6845, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 12.911795616149902, |
|
"learning_rate": 8.333333333333334e-09, |
|
"loss": 0.6836, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 6.274200439453125, |
|
"learning_rate": 1.111111111111111e-08, |
|
"loss": 0.6521, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 8.031621932983398, |
|
"learning_rate": 1.3888888888888887e-08, |
|
"loss": 0.6781, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 7.054372787475586, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"loss": 0.6672, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.42124542124542125, |
|
"eval_loss": 0.6979768872261047, |
|
"eval_runtime": 1.1041, |
|
"eval_samples_per_second": 247.254, |
|
"eval_steps_per_second": 4.528, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 7.790348529815674, |
|
"learning_rate": 1.9444444444444445e-08, |
|
"loss": 0.6725, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 13.414072036743164, |
|
"learning_rate": 2.222222222222222e-08, |
|
"loss": 0.6652, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 8.81810474395752, |
|
"learning_rate": 2.5e-08, |
|
"loss": 0.6698, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 7.751201152801514, |
|
"learning_rate": 2.7777777777777774e-08, |
|
"loss": 0.6817, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 8.985770225524902, |
|
"learning_rate": 3.0555555555555556e-08, |
|
"loss": 0.6661, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 6.888631820678711, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"loss": 0.6712, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 8.5791597366333, |
|
"learning_rate": 3.6111111111111106e-08, |
|
"loss": 0.6617, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4249084249084249, |
|
"eval_loss": 0.6965357065200806, |
|
"eval_runtime": 1.1521, |
|
"eval_samples_per_second": 236.95, |
|
"eval_steps_per_second": 4.34, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.1538461538461537, |
|
"grad_norm": 9.721835136413574, |
|
"learning_rate": 3.888888888888889e-08, |
|
"loss": 0.665, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 10.525633811950684, |
|
"learning_rate": 4.166666666666666e-08, |
|
"loss": 0.6629, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 5.860161304473877, |
|
"learning_rate": 4.444444444444444e-08, |
|
"loss": 0.686, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 7.980571269989014, |
|
"learning_rate": 4.722222222222222e-08, |
|
"loss": 0.6638, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"grad_norm": 6.91188383102417, |
|
"learning_rate": 5e-08, |
|
"loss": 0.6586, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"grad_norm": 8.952598571777344, |
|
"learning_rate": 5.2777777777777776e-08, |
|
"loss": 0.6699, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"eval_accuracy": 0.43956043956043955, |
|
"eval_loss": 0.6944313049316406, |
|
"eval_runtime": 1.1568, |
|
"eval_samples_per_second": 235.989, |
|
"eval_steps_per_second": 4.322, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 16.572885513305664, |
|
"learning_rate": 5.555555555555555e-08, |
|
"loss": 0.6761, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.230769230769231, |
|
"grad_norm": 10.379450798034668, |
|
"learning_rate": 5.833333333333333e-08, |
|
"loss": 0.6757, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 3.3846153846153846, |
|
"grad_norm": 8.093114852905273, |
|
"learning_rate": 6.111111111111111e-08, |
|
"loss": 0.6572, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 3.5384615384615383, |
|
"grad_norm": 12.009868621826172, |
|
"learning_rate": 6.388888888888888e-08, |
|
"loss": 0.6781, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"grad_norm": 6.616479396820068, |
|
"learning_rate": 6.666666666666667e-08, |
|
"loss": 0.6519, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 4.752511024475098, |
|
"learning_rate": 6.944444444444444e-08, |
|
"loss": 0.669, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.52732515335083, |
|
"learning_rate": 7.222222222222221e-08, |
|
"loss": 0.662, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.43956043956043955, |
|
"eval_loss": 0.6909541487693787, |
|
"eval_runtime": 1.1432, |
|
"eval_samples_per_second": 238.799, |
|
"eval_steps_per_second": 4.374, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 4.153846153846154, |
|
"grad_norm": 10.048100471496582, |
|
"learning_rate": 7.5e-08, |
|
"loss": 0.6616, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 4.3076923076923075, |
|
"grad_norm": 9.803552627563477, |
|
"learning_rate": 7.777777777777778e-08, |
|
"loss": 0.672, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 4.461538461538462, |
|
"grad_norm": 4.989064693450928, |
|
"learning_rate": 8.055555555555555e-08, |
|
"loss": 0.6615, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 9.584320068359375, |
|
"learning_rate": 8.333333333333333e-08, |
|
"loss": 0.6534, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.769230769230769, |
|
"grad_norm": 8.697301864624023, |
|
"learning_rate": 8.611111111111111e-08, |
|
"loss": 0.6592, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"grad_norm": 6.305507183074951, |
|
"learning_rate": 8.888888888888888e-08, |
|
"loss": 0.6548, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.45787545787545786, |
|
"eval_loss": 0.6873242855072021, |
|
"eval_runtime": 1.0879, |
|
"eval_samples_per_second": 250.944, |
|
"eval_steps_per_second": 4.596, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 5.076923076923077, |
|
"grad_norm": 8.161693572998047, |
|
"learning_rate": 9.166666666666665e-08, |
|
"loss": 0.6463, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 5.230769230769231, |
|
"grad_norm": 5.462063789367676, |
|
"learning_rate": 9.444444444444444e-08, |
|
"loss": 0.6532, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 5.384615384615385, |
|
"grad_norm": 7.397779941558838, |
|
"learning_rate": 9.722222222222222e-08, |
|
"loss": 0.6447, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 5.538461538461538, |
|
"grad_norm": 9.939643859863281, |
|
"learning_rate": 1e-07, |
|
"loss": 0.6484, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 5.6923076923076925, |
|
"grad_norm": 6.816234111785889, |
|
"learning_rate": 1.0277777777777777e-07, |
|
"loss": 0.6558, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"grad_norm": 11.238946914672852, |
|
"learning_rate": 1.0555555555555555e-07, |
|
"loss": 0.6669, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 8.005824089050293, |
|
"learning_rate": 1.0833333333333334e-07, |
|
"loss": 0.6541, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.4835164835164835, |
|
"eval_loss": 0.6825475692749023, |
|
"eval_runtime": 1.1582, |
|
"eval_samples_per_second": 235.72, |
|
"eval_steps_per_second": 4.317, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 6.5383524894714355, |
|
"learning_rate": 1.111111111111111e-07, |
|
"loss": 0.6317, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.3076923076923075, |
|
"grad_norm": 4.98549747467041, |
|
"learning_rate": 1.1388888888888888e-07, |
|
"loss": 0.6609, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 6.461538461538462, |
|
"grad_norm": 6.8932037353515625, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"loss": 0.6459, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 6.615384615384615, |
|
"grad_norm": 6.804798126220703, |
|
"learning_rate": 1.1944444444444445e-07, |
|
"loss": 0.6628, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"grad_norm": 7.5995707511901855, |
|
"learning_rate": 1.2222222222222222e-07, |
|
"loss": 0.6501, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"grad_norm": 10.426745414733887, |
|
"learning_rate": 1.25e-07, |
|
"loss": 0.6222, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"eval_accuracy": 0.5311355311355311, |
|
"eval_loss": 0.6776841282844543, |
|
"eval_runtime": 1.1748, |
|
"eval_samples_per_second": 232.383, |
|
"eval_steps_per_second": 4.256, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 7.076923076923077, |
|
"grad_norm": 6.766184329986572, |
|
"learning_rate": 1.2777777777777777e-07, |
|
"loss": 0.6452, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 7.230769230769231, |
|
"grad_norm": 10.556381225585938, |
|
"learning_rate": 1.3055555555555556e-07, |
|
"loss": 0.6157, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 7.384615384615385, |
|
"grad_norm": 9.157587051391602, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"loss": 0.6313, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 7.538461538461538, |
|
"grad_norm": 6.186281204223633, |
|
"learning_rate": 1.3611111111111108e-07, |
|
"loss": 0.6773, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 5.614987373352051, |
|
"learning_rate": 1.3888888888888888e-07, |
|
"loss": 0.6474, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 7.846153846153846, |
|
"grad_norm": 5.66176700592041, |
|
"learning_rate": 1.4166666666666665e-07, |
|
"loss": 0.6463, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.733225345611572, |
|
"learning_rate": 1.4444444444444442e-07, |
|
"loss": 0.6555, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5421245421245421, |
|
"eval_loss": 0.6718742251396179, |
|
"eval_runtime": 1.1693, |
|
"eval_samples_per_second": 233.475, |
|
"eval_steps_per_second": 4.276, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 8.153846153846153, |
|
"grad_norm": 5.067059516906738, |
|
"learning_rate": 1.4722222222222222e-07, |
|
"loss": 0.6388, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 8.307692307692308, |
|
"grad_norm": 7.690587520599365, |
|
"learning_rate": 1.5e-07, |
|
"loss": 0.6139, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 8.461538461538462, |
|
"grad_norm": 4.471611976623535, |
|
"learning_rate": 1.527777777777778e-07, |
|
"loss": 0.64, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 8.615384615384615, |
|
"grad_norm": 6.964099884033203, |
|
"learning_rate": 1.5555555555555556e-07, |
|
"loss": 0.6399, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.76923076923077, |
|
"grad_norm": 4.763670444488525, |
|
"learning_rate": 1.583333333333333e-07, |
|
"loss": 0.6176, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"grad_norm": 7.8895063400268555, |
|
"learning_rate": 1.611111111111111e-07, |
|
"loss": 0.6226, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.5860805860805861, |
|
"eval_loss": 0.6665313839912415, |
|
"eval_runtime": 1.1415, |
|
"eval_samples_per_second": 239.164, |
|
"eval_steps_per_second": 4.38, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 9.076923076923077, |
|
"grad_norm": 5.872027397155762, |
|
"learning_rate": 1.6388888888888888e-07, |
|
"loss": 0.6097, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 4.506241798400879, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": 0.6481, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.384615384615385, |
|
"grad_norm": 6.888943672180176, |
|
"learning_rate": 1.6944444444444445e-07, |
|
"loss": 0.6135, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 9.538461538461538, |
|
"grad_norm": 5.079667568206787, |
|
"learning_rate": 1.7222222222222222e-07, |
|
"loss": 0.6268, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 9.692307692307692, |
|
"grad_norm": 5.653709411621094, |
|
"learning_rate": 1.75e-07, |
|
"loss": 0.6259, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"grad_norm": 5.669986724853516, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"loss": 0.6195, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.195782661437988, |
|
"learning_rate": 1.8055555555555554e-07, |
|
"loss": 0.5989, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 0.6603101491928101, |
|
"eval_runtime": 1.1943, |
|
"eval_samples_per_second": 228.592, |
|
"eval_steps_per_second": 4.187, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 10.153846153846153, |
|
"grad_norm": 5.164191722869873, |
|
"learning_rate": 1.833333333333333e-07, |
|
"loss": 0.6329, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 10.307692307692308, |
|
"grad_norm": 5.711167812347412, |
|
"learning_rate": 1.861111111111111e-07, |
|
"loss": 0.6198, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 10.461538461538462, |
|
"grad_norm": 5.415841102600098, |
|
"learning_rate": 1.8888888888888888e-07, |
|
"loss": 0.6162, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 10.615384615384615, |
|
"grad_norm": 5.584977626800537, |
|
"learning_rate": 1.9166666666666668e-07, |
|
"loss": 0.626, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"grad_norm": 4.149146556854248, |
|
"learning_rate": 1.9444444444444445e-07, |
|
"loss": 0.6246, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.923076923076923, |
|
"grad_norm": 8.436751365661621, |
|
"learning_rate": 1.9722222222222222e-07, |
|
"loss": 0.5754, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 10.923076923076923, |
|
"eval_accuracy": 0.6263736263736264, |
|
"eval_loss": 0.6555379629135132, |
|
"eval_runtime": 1.1482, |
|
"eval_samples_per_second": 237.759, |
|
"eval_steps_per_second": 4.355, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 11.076923076923077, |
|
"grad_norm": 5.09835147857666, |
|
"learning_rate": 2e-07, |
|
"loss": 0.6153, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 11.23076923076923, |
|
"grad_norm": 6.399817943572998, |
|
"learning_rate": 2.0277777777777776e-07, |
|
"loss": 0.6001, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 11.384615384615385, |
|
"grad_norm": 4.717789173126221, |
|
"learning_rate": 2.0555555555555553e-07, |
|
"loss": 0.6009, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 11.538461538461538, |
|
"grad_norm": 5.399771213531494, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"loss": 0.5998, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 11.692307692307692, |
|
"grad_norm": 5.429381370544434, |
|
"learning_rate": 2.111111111111111e-07, |
|
"loss": 0.5892, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 11.846153846153847, |
|
"grad_norm": 4.105190277099609, |
|
"learning_rate": 2.1388888888888888e-07, |
|
"loss": 0.6111, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 6.455173969268799, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"loss": 0.6251, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6483516483516484, |
|
"eval_loss": 0.6492875218391418, |
|
"eval_runtime": 1.1131, |
|
"eval_samples_per_second": 245.251, |
|
"eval_steps_per_second": 4.492, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 12.153846153846153, |
|
"grad_norm": 6.023905277252197, |
|
"learning_rate": 2.1944444444444442e-07, |
|
"loss": 0.6211, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 4.803109645843506, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 0.5972, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.461538461538462, |
|
"grad_norm": 4.598735332489014, |
|
"learning_rate": 2.25e-07, |
|
"loss": 0.5978, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 12.615384615384615, |
|
"grad_norm": 5.137476444244385, |
|
"learning_rate": 2.2777777777777776e-07, |
|
"loss": 0.5878, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 12.76923076923077, |
|
"grad_norm": 5.255553245544434, |
|
"learning_rate": 2.3055555555555556e-07, |
|
"loss": 0.59, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"grad_norm": 4.83677864074707, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"loss": 0.5796, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6446050405502319, |
|
"eval_runtime": 1.1065, |
|
"eval_samples_per_second": 246.726, |
|
"eval_steps_per_second": 4.519, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 13.076923076923077, |
|
"grad_norm": 5.2046403884887695, |
|
"learning_rate": 2.361111111111111e-07, |
|
"loss": 0.5808, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 13.23076923076923, |
|
"grad_norm": 5.6977057456970215, |
|
"learning_rate": 2.388888888888889e-07, |
|
"loss": 0.5921, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 13.384615384615385, |
|
"grad_norm": 4.75449800491333, |
|
"learning_rate": 2.4166666666666665e-07, |
|
"loss": 0.576, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 13.538461538461538, |
|
"grad_norm": 4.761056423187256, |
|
"learning_rate": 2.4444444444444445e-07, |
|
"loss": 0.6126, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 13.692307692307692, |
|
"grad_norm": 4.913057327270508, |
|
"learning_rate": 2.4722222222222224e-07, |
|
"loss": 0.5814, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"grad_norm": 7.290613651275635, |
|
"learning_rate": 2.5e-07, |
|
"loss": 0.5803, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.033799171447754, |
|
"learning_rate": 2.5277777777777773e-07, |
|
"loss": 0.5763, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6390318274497986, |
|
"eval_runtime": 1.1176, |
|
"eval_samples_per_second": 244.273, |
|
"eval_steps_per_second": 4.474, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.153846153846153, |
|
"grad_norm": 5.3400678634643555, |
|
"learning_rate": 2.5555555555555553e-07, |
|
"loss": 0.5817, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 14.307692307692308, |
|
"grad_norm": 3.9350779056549072, |
|
"learning_rate": 2.5833333333333333e-07, |
|
"loss": 0.588, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 14.461538461538462, |
|
"grad_norm": 4.339548110961914, |
|
"learning_rate": 2.6111111111111113e-07, |
|
"loss": 0.5964, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 14.615384615384615, |
|
"grad_norm": 5.889684677124023, |
|
"learning_rate": 2.638888888888889e-07, |
|
"loss": 0.5636, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"grad_norm": 4.385285377502441, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"loss": 0.5898, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 14.923076923076923, |
|
"grad_norm": 7.42651891708374, |
|
"learning_rate": 2.694444444444444e-07, |
|
"loss": 0.5952, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 14.923076923076923, |
|
"eval_accuracy": 0.684981684981685, |
|
"eval_loss": 0.6333425045013428, |
|
"eval_runtime": 1.0931, |
|
"eval_samples_per_second": 249.747, |
|
"eval_steps_per_second": 4.574, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 15.076923076923077, |
|
"grad_norm": 7.099740028381348, |
|
"learning_rate": 2.7222222222222216e-07, |
|
"loss": 0.5412, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 15.23076923076923, |
|
"grad_norm": 5.428436279296875, |
|
"learning_rate": 2.75e-07, |
|
"loss": 0.5645, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 5.187060832977295, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.5795, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.538461538461538, |
|
"grad_norm": 4.902520656585693, |
|
"learning_rate": 2.8055555555555556e-07, |
|
"loss": 0.5459, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 15.692307692307692, |
|
"grad_norm": 4.782113075256348, |
|
"learning_rate": 2.833333333333333e-07, |
|
"loss": 0.577, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 15.846153846153847, |
|
"grad_norm": 6.622628688812256, |
|
"learning_rate": 2.861111111111111e-07, |
|
"loss": 0.5583, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.653256416320801, |
|
"learning_rate": 2.8888888888888885e-07, |
|
"loss": 0.5675, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7032967032967034, |
|
"eval_loss": 0.6269450783729553, |
|
"eval_runtime": 1.2331, |
|
"eval_samples_per_second": 221.389, |
|
"eval_steps_per_second": 4.055, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 16.153846153846153, |
|
"grad_norm": 5.335786819458008, |
|
"learning_rate": 2.916666666666667e-07, |
|
"loss": 0.5492, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 16.307692307692307, |
|
"grad_norm": 6.031639575958252, |
|
"learning_rate": 2.9444444444444444e-07, |
|
"loss": 0.5475, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 16.46153846153846, |
|
"grad_norm": 8.427881240844727, |
|
"learning_rate": 2.972222222222222e-07, |
|
"loss": 0.5995, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 16.615384615384617, |
|
"grad_norm": 4.41181755065918, |
|
"learning_rate": 3e-07, |
|
"loss": 0.5458, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 16.76923076923077, |
|
"grad_norm": 5.0706257820129395, |
|
"learning_rate": 3.0277777777777773e-07, |
|
"loss": 0.5739, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"grad_norm": 5.834512233734131, |
|
"learning_rate": 3.055555555555556e-07, |
|
"loss": 0.5453, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.7106227106227107, |
|
"eval_loss": 0.6210848689079285, |
|
"eval_runtime": 1.1361, |
|
"eval_samples_per_second": 240.302, |
|
"eval_steps_per_second": 4.401, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 17.076923076923077, |
|
"grad_norm": 4.99503231048584, |
|
"learning_rate": 3.0833333333333333e-07, |
|
"loss": 0.5517, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 17.23076923076923, |
|
"grad_norm": 4.0909833908081055, |
|
"learning_rate": 3.111111111111111e-07, |
|
"loss": 0.5664, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 17.384615384615383, |
|
"grad_norm": 4.5490851402282715, |
|
"learning_rate": 3.1388888888888887e-07, |
|
"loss": 0.541, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 17.53846153846154, |
|
"grad_norm": 4.016046524047852, |
|
"learning_rate": 3.166666666666666e-07, |
|
"loss": 0.5788, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 17.692307692307693, |
|
"grad_norm": 5.587233066558838, |
|
"learning_rate": 3.194444444444444e-07, |
|
"loss": 0.5155, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"grad_norm": 5.020046234130859, |
|
"learning_rate": 3.222222222222222e-07, |
|
"loss": 0.5386, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 6.417337417602539, |
|
"learning_rate": 3.25e-07, |
|
"loss": 0.5199, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.6150110363960266, |
|
"eval_runtime": 1.1307, |
|
"eval_samples_per_second": 241.446, |
|
"eval_steps_per_second": 4.422, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 18.153846153846153, |
|
"grad_norm": 5.794677257537842, |
|
"learning_rate": 3.2777777777777776e-07, |
|
"loss": 0.5343, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 18.307692307692307, |
|
"grad_norm": 4.628763198852539, |
|
"learning_rate": 3.3055555555555556e-07, |
|
"loss": 0.5275, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 7.958808422088623, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.5252, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.615384615384617, |
|
"grad_norm": 4.4327826499938965, |
|
"learning_rate": 3.361111111111111e-07, |
|
"loss": 0.5233, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"grad_norm": 5.422006130218506, |
|
"learning_rate": 3.388888888888889e-07, |
|
"loss": 0.5646, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 18.923076923076923, |
|
"grad_norm": 4.5244975090026855, |
|
"learning_rate": 3.4166666666666664e-07, |
|
"loss": 0.541, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 18.923076923076923, |
|
"eval_accuracy": 0.7216117216117216, |
|
"eval_loss": 0.6090343594551086, |
|
"eval_runtime": 1.2237, |
|
"eval_samples_per_second": 223.1, |
|
"eval_steps_per_second": 4.086, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 19.076923076923077, |
|
"grad_norm": 6.068373680114746, |
|
"learning_rate": 3.4444444444444444e-07, |
|
"loss": 0.5179, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 19.23076923076923, |
|
"grad_norm": 4.209527492523193, |
|
"learning_rate": 3.472222222222222e-07, |
|
"loss": 0.5533, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 19.384615384615383, |
|
"grad_norm": 5.68998384475708, |
|
"learning_rate": 3.5e-07, |
|
"loss": 0.5219, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 19.53846153846154, |
|
"grad_norm": 5.829248428344727, |
|
"learning_rate": 3.527777777777778e-07, |
|
"loss": 0.503, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 19.692307692307693, |
|
"grad_norm": 4.40165376663208, |
|
"learning_rate": 3.5555555555555553e-07, |
|
"loss": 0.5483, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 19.846153846153847, |
|
"grad_norm": 4.628623962402344, |
|
"learning_rate": 3.583333333333333e-07, |
|
"loss": 0.5249, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.771780014038086, |
|
"learning_rate": 3.6111111111111107e-07, |
|
"loss": 0.5273, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7289377289377289, |
|
"eval_loss": 0.6007034182548523, |
|
"eval_runtime": 1.1281, |
|
"eval_samples_per_second": 241.989, |
|
"eval_steps_per_second": 4.432, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.153846153846153, |
|
"grad_norm": 4.505945682525635, |
|
"learning_rate": 3.6388888888888887e-07, |
|
"loss": 0.515, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 20.307692307692307, |
|
"grad_norm": 7.482431411743164, |
|
"learning_rate": 3.666666666666666e-07, |
|
"loss": 0.4948, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 20.46153846153846, |
|
"grad_norm": 4.583889007568359, |
|
"learning_rate": 3.6944444444444447e-07, |
|
"loss": 0.5215, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 20.615384615384617, |
|
"grad_norm": 5.050055980682373, |
|
"learning_rate": 3.722222222222222e-07, |
|
"loss": 0.502, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 20.76923076923077, |
|
"grad_norm": 5.0981245040893555, |
|
"learning_rate": 3.75e-07, |
|
"loss": 0.5343, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"grad_norm": 7.6524529457092285, |
|
"learning_rate": 3.7777777777777775e-07, |
|
"loss": 0.495, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7289377289377289, |
|
"eval_loss": 0.5934087634086609, |
|
"eval_runtime": 1.1036, |
|
"eval_samples_per_second": 247.368, |
|
"eval_steps_per_second": 4.531, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 21.076923076923077, |
|
"grad_norm": 5.747613906860352, |
|
"learning_rate": 3.805555555555555e-07, |
|
"loss": 0.4929, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 21.23076923076923, |
|
"grad_norm": 8.28075122833252, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"loss": 0.5302, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 21.384615384615383, |
|
"grad_norm": 5.376802921295166, |
|
"learning_rate": 3.861111111111111e-07, |
|
"loss": 0.547, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 4.9347968101501465, |
|
"learning_rate": 3.888888888888889e-07, |
|
"loss": 0.4807, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.692307692307693, |
|
"grad_norm": 6.574237823486328, |
|
"learning_rate": 3.9166666666666664e-07, |
|
"loss": 0.4839, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"grad_norm": 4.990074157714844, |
|
"learning_rate": 3.9444444444444444e-07, |
|
"loss": 0.4937, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 6.261688232421875, |
|
"learning_rate": 3.972222222222222e-07, |
|
"loss": 0.4855, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7472527472527473, |
|
"eval_loss": 0.5855301022529602, |
|
"eval_runtime": 1.1327, |
|
"eval_samples_per_second": 241.009, |
|
"eval_steps_per_second": 4.414, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 22.153846153846153, |
|
"grad_norm": 5.447592258453369, |
|
"learning_rate": 4e-07, |
|
"loss": 0.4225, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 22.307692307692307, |
|
"grad_norm": 4.675624847412109, |
|
"learning_rate": 4.027777777777778e-07, |
|
"loss": 0.4982, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 22.46153846153846, |
|
"grad_norm": 4.06153678894043, |
|
"learning_rate": 4.055555555555555e-07, |
|
"loss": 0.4751, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 22.615384615384617, |
|
"grad_norm": 7.109158039093018, |
|
"learning_rate": 4.083333333333333e-07, |
|
"loss": 0.4232, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"grad_norm": 7.506172180175781, |
|
"learning_rate": 4.1111111111111107e-07, |
|
"loss": 0.4935, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 22.923076923076923, |
|
"grad_norm": 4.346311092376709, |
|
"learning_rate": 4.1388888888888887e-07, |
|
"loss": 0.4763, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 22.923076923076923, |
|
"eval_accuracy": 0.7362637362637363, |
|
"eval_loss": 0.5787296295166016, |
|
"eval_runtime": 1.163, |
|
"eval_samples_per_second": 234.74, |
|
"eval_steps_per_second": 4.299, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 23.076923076923077, |
|
"grad_norm": 7.535876750946045, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.4539, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 23.23076923076923, |
|
"grad_norm": 4.603224277496338, |
|
"learning_rate": 4.194444444444444e-07, |
|
"loss": 0.491, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 23.384615384615383, |
|
"grad_norm": 4.099734783172607, |
|
"learning_rate": 4.222222222222222e-07, |
|
"loss": 0.4877, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 23.53846153846154, |
|
"grad_norm": 6.270420551300049, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"loss": 0.4791, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 23.692307692307693, |
|
"grad_norm": 5.5756330490112305, |
|
"learning_rate": 4.2777777777777775e-07, |
|
"loss": 0.4192, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 23.846153846153847, |
|
"grad_norm": 4.492015838623047, |
|
"learning_rate": 4.3055555555555555e-07, |
|
"loss": 0.429, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 5.119944095611572, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"loss": 0.4287, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7509157509157509, |
|
"eval_loss": 0.5693350434303284, |
|
"eval_runtime": 1.2614, |
|
"eval_samples_per_second": 216.434, |
|
"eval_steps_per_second": 3.964, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 24.153846153846153, |
|
"grad_norm": 6.68319034576416, |
|
"learning_rate": 4.361111111111111e-07, |
|
"loss": 0.4549, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 24.307692307692307, |
|
"grad_norm": 6.93166446685791, |
|
"learning_rate": 4.3888888888888884e-07, |
|
"loss": 0.4878, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 24.46153846153846, |
|
"grad_norm": 4.761252403259277, |
|
"learning_rate": 4.4166666666666664e-07, |
|
"loss": 0.463, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 5.771098613739014, |
|
"learning_rate": 4.444444444444444e-07, |
|
"loss": 0.4418, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 24.76923076923077, |
|
"grad_norm": 5.692720413208008, |
|
"learning_rate": 4.4722222222222223e-07, |
|
"loss": 0.4211, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"grad_norm": 10.276296615600586, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.445, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.5618556141853333, |
|
"eval_runtime": 1.2168, |
|
"eval_samples_per_second": 224.355, |
|
"eval_steps_per_second": 4.109, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 25.076923076923077, |
|
"grad_norm": 6.790480613708496, |
|
"learning_rate": 4.527777777777778e-07, |
|
"loss": 0.4313, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 25.23076923076923, |
|
"grad_norm": 4.737351417541504, |
|
"learning_rate": 4.555555555555555e-07, |
|
"loss": 0.4303, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 25.384615384615383, |
|
"grad_norm": 4.519160747528076, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"loss": 0.455, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 25.53846153846154, |
|
"grad_norm": 5.933927536010742, |
|
"learning_rate": 4.611111111111111e-07, |
|
"loss": 0.4461, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 25.692307692307693, |
|
"grad_norm": 7.9362664222717285, |
|
"learning_rate": 4.6388888888888886e-07, |
|
"loss": 0.3951, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"grad_norm": 9.995780944824219, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"loss": 0.4142, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 6.652438163757324, |
|
"learning_rate": 4.694444444444444e-07, |
|
"loss": 0.4343, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7802197802197802, |
|
"eval_loss": 0.5539770126342773, |
|
"eval_runtime": 1.1366, |
|
"eval_samples_per_second": 240.189, |
|
"eval_steps_per_second": 4.399, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 26.153846153846153, |
|
"grad_norm": 4.573406219482422, |
|
"learning_rate": 4.722222222222222e-07, |
|
"loss": 0.4279, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 26.307692307692307, |
|
"grad_norm": 6.939181327819824, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"loss": 0.443, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 26.46153846153846, |
|
"grad_norm": 10.387896537780762, |
|
"learning_rate": 4.777777777777778e-07, |
|
"loss": 0.4462, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 26.615384615384617, |
|
"grad_norm": 6.387257099151611, |
|
"learning_rate": 4.805555555555555e-07, |
|
"loss": 0.4221, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"grad_norm": 5.768893241882324, |
|
"learning_rate": 4.833333333333333e-07, |
|
"loss": 0.4048, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 26.923076923076923, |
|
"grad_norm": 6.23684024810791, |
|
"learning_rate": 4.861111111111111e-07, |
|
"loss": 0.3748, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 26.923076923076923, |
|
"eval_accuracy": 0.7875457875457875, |
|
"eval_loss": 0.5466815829277039, |
|
"eval_runtime": 1.1181, |
|
"eval_samples_per_second": 244.171, |
|
"eval_steps_per_second": 4.472, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 27.076923076923077, |
|
"grad_norm": 6.4479146003723145, |
|
"learning_rate": 4.888888888888889e-07, |
|
"loss": 0.3923, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 27.23076923076923, |
|
"grad_norm": 8.554872512817383, |
|
"learning_rate": 4.916666666666666e-07, |
|
"loss": 0.4114, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 27.384615384615383, |
|
"grad_norm": 6.017930030822754, |
|
"learning_rate": 4.944444444444445e-07, |
|
"loss": 0.3798, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 27.53846153846154, |
|
"grad_norm": 8.284281730651855, |
|
"learning_rate": 4.972222222222222e-07, |
|
"loss": 0.4146, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 9.119588851928711, |
|
"learning_rate": 5e-07, |
|
"loss": 0.4436, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 27.846153846153847, |
|
"grad_norm": 9.632242202758789, |
|
"learning_rate": 4.988095238095238e-07, |
|
"loss": 0.4031, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 16.935251235961914, |
|
"learning_rate": 4.976190476190476e-07, |
|
"loss": 0.4041, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8021978021978022, |
|
"eval_loss": 0.5421282052993774, |
|
"eval_runtime": 1.1482, |
|
"eval_samples_per_second": 237.764, |
|
"eval_steps_per_second": 4.355, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 28.153846153846153, |
|
"grad_norm": 5.241950511932373, |
|
"learning_rate": 4.964285714285715e-07, |
|
"loss": 0.4528, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 28.307692307692307, |
|
"grad_norm": 11.666001319885254, |
|
"learning_rate": 4.952380952380952e-07, |
|
"loss": 0.3889, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 28.46153846153846, |
|
"grad_norm": 9.493650436401367, |
|
"learning_rate": 4.94047619047619e-07, |
|
"loss": 0.3956, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 28.615384615384617, |
|
"grad_norm": 11.480545043945312, |
|
"learning_rate": 4.928571428571429e-07, |
|
"loss": 0.3756, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 28.76923076923077, |
|
"grad_norm": 18.468042373657227, |
|
"learning_rate": 4.916666666666666e-07, |
|
"loss": 0.401, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"grad_norm": 8.127854347229004, |
|
"learning_rate": 4.904761904761904e-07, |
|
"loss": 0.3543, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.529083788394928, |
|
"eval_runtime": 1.1719, |
|
"eval_samples_per_second": 232.959, |
|
"eval_steps_per_second": 4.267, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 29.076923076923077, |
|
"grad_norm": 12.729475975036621, |
|
"learning_rate": 4.892857142857142e-07, |
|
"loss": 0.4255, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 29.23076923076923, |
|
"grad_norm": 6.080268383026123, |
|
"learning_rate": 4.880952380952381e-07, |
|
"loss": 0.4233, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 29.384615384615383, |
|
"grad_norm": 9.651863098144531, |
|
"learning_rate": 4.869047619047619e-07, |
|
"loss": 0.3753, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 29.53846153846154, |
|
"grad_norm": 12.342060089111328, |
|
"learning_rate": 4.857142857142857e-07, |
|
"loss": 0.392, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 29.692307692307693, |
|
"grad_norm": 5.0623779296875, |
|
"learning_rate": 4.845238095238095e-07, |
|
"loss": 0.3942, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"grad_norm": 18.967161178588867, |
|
"learning_rate": 4.833333333333333e-07, |
|
"loss": 0.3374, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 13.24194622039795, |
|
"learning_rate": 4.821428571428571e-07, |
|
"loss": 0.3972, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8278388278388278, |
|
"eval_loss": 0.5134266018867493, |
|
"eval_runtime": 1.2397, |
|
"eval_samples_per_second": 220.215, |
|
"eval_steps_per_second": 4.033, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 30.153846153846153, |
|
"grad_norm": 12.518712997436523, |
|
"learning_rate": 4.80952380952381e-07, |
|
"loss": 0.4197, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 30.307692307692307, |
|
"grad_norm": 15.362881660461426, |
|
"learning_rate": 4.797619047619048e-07, |
|
"loss": 0.4056, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 30.46153846153846, |
|
"grad_norm": 17.072725296020508, |
|
"learning_rate": 4.785714285714286e-07, |
|
"loss": 0.3856, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 30.615384615384617, |
|
"grad_norm": 6.637291431427002, |
|
"learning_rate": 4.773809523809523e-07, |
|
"loss": 0.3627, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 8.751256942749023, |
|
"learning_rate": 4.761904761904761e-07, |
|
"loss": 0.3637, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 30.923076923076923, |
|
"grad_norm": 5.588964462280273, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"loss": 0.3716, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 30.923076923076923, |
|
"eval_accuracy": 0.8241758241758241, |
|
"eval_loss": 0.5149514079093933, |
|
"eval_runtime": 1.1326, |
|
"eval_samples_per_second": 241.045, |
|
"eval_steps_per_second": 4.415, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 31.076923076923077, |
|
"grad_norm": 8.8916654586792, |
|
"learning_rate": 4.738095238095238e-07, |
|
"loss": 0.362, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 31.23076923076923, |
|
"grad_norm": 7.172123908996582, |
|
"learning_rate": 4.7261904761904756e-07, |
|
"loss": 0.3541, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 31.384615384615383, |
|
"grad_norm": 9.399160385131836, |
|
"learning_rate": 4.714285714285714e-07, |
|
"loss": 0.3629, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 31.53846153846154, |
|
"grad_norm": 12.05125904083252, |
|
"learning_rate": 4.702380952380952e-07, |
|
"loss": 0.3414, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 31.692307692307693, |
|
"grad_norm": 6.808493137359619, |
|
"learning_rate": 4.69047619047619e-07, |
|
"loss": 0.372, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 31.846153846153847, |
|
"grad_norm": 12.759626388549805, |
|
"learning_rate": 4.6785714285714283e-07, |
|
"loss": 0.3574, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 9.680743217468262, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"loss": 0.3871, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8315018315018315, |
|
"eval_loss": 0.5100430846214294, |
|
"eval_runtime": 1.1335, |
|
"eval_samples_per_second": 240.838, |
|
"eval_steps_per_second": 4.411, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 32.15384615384615, |
|
"grad_norm": 13.226339340209961, |
|
"learning_rate": 4.6547619047619044e-07, |
|
"loss": 0.3623, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 32.30769230769231, |
|
"grad_norm": 9.359790802001953, |
|
"learning_rate": 4.6428571428571427e-07, |
|
"loss": 0.4045, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 32.46153846153846, |
|
"grad_norm": 4.744467258453369, |
|
"learning_rate": 4.630952380952381e-07, |
|
"loss": 0.3852, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 32.61538461538461, |
|
"grad_norm": 9.221460342407227, |
|
"learning_rate": 4.6190476190476193e-07, |
|
"loss": 0.3267, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 32.76923076923077, |
|
"grad_norm": 5.6791510581970215, |
|
"learning_rate": 4.6071428571428566e-07, |
|
"loss": 0.3969, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"grad_norm": 13.916045188903809, |
|
"learning_rate": 4.595238095238095e-07, |
|
"loss": 0.3729, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8351648351648352, |
|
"eval_loss": 0.4985570013523102, |
|
"eval_runtime": 1.1523, |
|
"eval_samples_per_second": 236.921, |
|
"eval_steps_per_second": 4.339, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 33.07692307692308, |
|
"grad_norm": 20.20587158203125, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"loss": 0.3999, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 33.23076923076923, |
|
"grad_norm": 5.628478050231934, |
|
"learning_rate": 4.571428571428571e-07, |
|
"loss": 0.333, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 33.38461538461539, |
|
"grad_norm": 9.061274528503418, |
|
"learning_rate": 4.5595238095238093e-07, |
|
"loss": 0.3807, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 33.53846153846154, |
|
"grad_norm": 7.302306652069092, |
|
"learning_rate": 4.5476190476190476e-07, |
|
"loss": 0.3309, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 33.69230769230769, |
|
"grad_norm": 17.617534637451172, |
|
"learning_rate": 4.5357142857142854e-07, |
|
"loss": 0.319, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 9.831780433654785, |
|
"learning_rate": 4.5238095238095237e-07, |
|
"loss": 0.3849, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 10.667181968688965, |
|
"learning_rate": 4.511904761904762e-07, |
|
"loss": 0.3286, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8461538461538461, |
|
"eval_loss": 0.4945792853832245, |
|
"eval_runtime": 1.1795, |
|
"eval_samples_per_second": 231.451, |
|
"eval_steps_per_second": 4.239, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 34.15384615384615, |
|
"grad_norm": 6.82171106338501, |
|
"learning_rate": 4.5e-07, |
|
"loss": 0.3129, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 34.30769230769231, |
|
"grad_norm": 12.462568283081055, |
|
"learning_rate": 4.488095238095238e-07, |
|
"loss": 0.2947, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 34.46153846153846, |
|
"grad_norm": 14.821395874023438, |
|
"learning_rate": 4.4761904761904764e-07, |
|
"loss": 0.381, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 34.61538461538461, |
|
"grad_norm": 11.688921928405762, |
|
"learning_rate": 4.464285714285714e-07, |
|
"loss": 0.392, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"grad_norm": 16.245473861694336, |
|
"learning_rate": 4.452380952380952e-07, |
|
"loss": 0.3356, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 34.92307692307692, |
|
"grad_norm": 8.85981559753418, |
|
"learning_rate": 4.4404761904761903e-07, |
|
"loss": 0.4261, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 34.92307692307692, |
|
"eval_accuracy": 0.8388278388278388, |
|
"eval_loss": 0.49569690227508545, |
|
"eval_runtime": 1.1615, |
|
"eval_samples_per_second": 235.046, |
|
"eval_steps_per_second": 4.305, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 35.07692307692308, |
|
"grad_norm": 14.444450378417969, |
|
"learning_rate": 4.428571428571428e-07, |
|
"loss": 0.3065, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 35.23076923076923, |
|
"grad_norm": 18.541534423828125, |
|
"learning_rate": 4.4166666666666664e-07, |
|
"loss": 0.319, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 35.38461538461539, |
|
"grad_norm": 14.79796314239502, |
|
"learning_rate": 4.4047619047619047e-07, |
|
"loss": 0.3511, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 35.53846153846154, |
|
"grad_norm": 14.025275230407715, |
|
"learning_rate": 4.3928571428571425e-07, |
|
"loss": 0.351, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 35.69230769230769, |
|
"grad_norm": 9.698802947998047, |
|
"learning_rate": 4.380952380952381e-07, |
|
"loss": 0.3369, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 35.84615384615385, |
|
"grad_norm": 10.56847858428955, |
|
"learning_rate": 4.369047619047619e-07, |
|
"loss": 0.2976, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 15.778355598449707, |
|
"learning_rate": 4.357142857142857e-07, |
|
"loss": 0.4014, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.48495998978614807, |
|
"eval_runtime": 1.1929, |
|
"eval_samples_per_second": 228.863, |
|
"eval_steps_per_second": 4.192, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 36.15384615384615, |
|
"grad_norm": 16.71240234375, |
|
"learning_rate": 4.345238095238095e-07, |
|
"loss": 0.3749, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 36.30769230769231, |
|
"grad_norm": 13.584702491760254, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"loss": 0.3812, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 36.46153846153846, |
|
"grad_norm": 8.108072280883789, |
|
"learning_rate": 4.3214285714285713e-07, |
|
"loss": 0.3024, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 36.61538461538461, |
|
"grad_norm": 9.233282089233398, |
|
"learning_rate": 4.3095238095238096e-07, |
|
"loss": 0.3413, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 36.76923076923077, |
|
"grad_norm": 13.080716133117676, |
|
"learning_rate": 4.297619047619048e-07, |
|
"loss": 0.2792, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 10.88381576538086, |
|
"learning_rate": 4.285714285714285e-07, |
|
"loss": 0.3514, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.4806550443172455, |
|
"eval_runtime": 1.1462, |
|
"eval_samples_per_second": 238.179, |
|
"eval_steps_per_second": 4.362, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 37.07692307692308, |
|
"grad_norm": 13.345056533813477, |
|
"learning_rate": 4.2738095238095235e-07, |
|
"loss": 0.318, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 37.23076923076923, |
|
"grad_norm": 9.593791961669922, |
|
"learning_rate": 4.261904761904762e-07, |
|
"loss": 0.3487, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 37.38461538461539, |
|
"grad_norm": 9.83149242401123, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"loss": 0.3283, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 37.53846153846154, |
|
"grad_norm": 11.976517677307129, |
|
"learning_rate": 4.238095238095238e-07, |
|
"loss": 0.407, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 37.69230769230769, |
|
"grad_norm": 7.24540901184082, |
|
"learning_rate": 4.226190476190476e-07, |
|
"loss": 0.3899, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"grad_norm": 11.755511283874512, |
|
"learning_rate": 4.214285714285714e-07, |
|
"loss": 0.247, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 14.939422607421875, |
|
"learning_rate": 4.202380952380952e-07, |
|
"loss": 0.3883, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.47668036818504333, |
|
"eval_runtime": 1.2822, |
|
"eval_samples_per_second": 212.923, |
|
"eval_steps_per_second": 3.9, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 38.15384615384615, |
|
"grad_norm": 7.6719279289245605, |
|
"learning_rate": 4.1904761904761906e-07, |
|
"loss": 0.3579, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 38.30769230769231, |
|
"grad_norm": 18.015718460083008, |
|
"learning_rate": 4.1785714285714283e-07, |
|
"loss": 0.3072, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 13.246123313903809, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.3756, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 38.61538461538461, |
|
"grad_norm": 7.806217670440674, |
|
"learning_rate": 4.154761904761905e-07, |
|
"loss": 0.3919, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"grad_norm": 5.912841796875, |
|
"learning_rate": 4.142857142857143e-07, |
|
"loss": 0.3079, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 38.92307692307692, |
|
"grad_norm": 7.757283687591553, |
|
"learning_rate": 4.1309523809523805e-07, |
|
"loss": 0.3219, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 38.92307692307692, |
|
"eval_accuracy": 0.8534798534798534, |
|
"eval_loss": 0.4762944281101227, |
|
"eval_runtime": 1.2149, |
|
"eval_samples_per_second": 224.702, |
|
"eval_steps_per_second": 4.115, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 39.07692307692308, |
|
"grad_norm": 7.3901238441467285, |
|
"learning_rate": 4.119047619047619e-07, |
|
"loss": 0.2908, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 39.23076923076923, |
|
"grad_norm": 6.3056535720825195, |
|
"learning_rate": 4.1071428571428566e-07, |
|
"loss": 0.2982, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 39.38461538461539, |
|
"grad_norm": 9.348855018615723, |
|
"learning_rate": 4.095238095238095e-07, |
|
"loss": 0.3693, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 39.53846153846154, |
|
"grad_norm": 7.346530914306641, |
|
"learning_rate": 4.083333333333333e-07, |
|
"loss": 0.3475, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 39.69230769230769, |
|
"grad_norm": 5.94871711730957, |
|
"learning_rate": 4.071428571428571e-07, |
|
"loss": 0.3274, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 39.84615384615385, |
|
"grad_norm": 35.71484375, |
|
"learning_rate": 4.0595238095238093e-07, |
|
"loss": 0.3718, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 13.31654167175293, |
|
"learning_rate": 4.0476190476190476e-07, |
|
"loss": 0.4351, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.47377410531044006, |
|
"eval_runtime": 1.1651, |
|
"eval_samples_per_second": 234.318, |
|
"eval_steps_per_second": 4.292, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 40.15384615384615, |
|
"grad_norm": 8.30079460144043, |
|
"learning_rate": 4.0357142857142854e-07, |
|
"loss": 0.4084, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 40.30769230769231, |
|
"grad_norm": 11.171014785766602, |
|
"learning_rate": 4.0238095238095237e-07, |
|
"loss": 0.2589, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 40.46153846153846, |
|
"grad_norm": 12.395658493041992, |
|
"learning_rate": 4.011904761904762e-07, |
|
"loss": 0.3645, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 40.61538461538461, |
|
"grad_norm": 12.52223014831543, |
|
"learning_rate": 4e-07, |
|
"loss": 0.3063, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 40.76923076923077, |
|
"grad_norm": 22.095630645751953, |
|
"learning_rate": 3.988095238095238e-07, |
|
"loss": 0.2982, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"grad_norm": 19.723215103149414, |
|
"learning_rate": 3.976190476190476e-07, |
|
"loss": 0.3068, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.46877339482307434, |
|
"eval_runtime": 1.1413, |
|
"eval_samples_per_second": 239.204, |
|
"eval_steps_per_second": 4.381, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 41.07692307692308, |
|
"grad_norm": 14.390270233154297, |
|
"learning_rate": 3.9642857142857137e-07, |
|
"loss": 0.3193, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 41.23076923076923, |
|
"grad_norm": 14.494707107543945, |
|
"learning_rate": 3.952380952380952e-07, |
|
"loss": 0.3274, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 41.38461538461539, |
|
"grad_norm": 9.31578540802002, |
|
"learning_rate": 3.9404761904761903e-07, |
|
"loss": 0.2905, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 41.53846153846154, |
|
"grad_norm": 11.39842700958252, |
|
"learning_rate": 3.928571428571428e-07, |
|
"loss": 0.2591, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 41.69230769230769, |
|
"grad_norm": 12.236638069152832, |
|
"learning_rate": 3.9166666666666664e-07, |
|
"loss": 0.3198, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"grad_norm": 14.803117752075195, |
|
"learning_rate": 3.9047619047619047e-07, |
|
"loss": 0.3718, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 12.712557792663574, |
|
"learning_rate": 3.8928571428571425e-07, |
|
"loss": 0.3356, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.45851626992225647, |
|
"eval_runtime": 1.178, |
|
"eval_samples_per_second": 231.744, |
|
"eval_steps_per_second": 4.244, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 42.15384615384615, |
|
"grad_norm": 5.399446964263916, |
|
"learning_rate": 3.880952380952381e-07, |
|
"loss": 0.3028, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 42.30769230769231, |
|
"grad_norm": 9.010210990905762, |
|
"learning_rate": 3.869047619047619e-07, |
|
"loss": 0.2897, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 42.46153846153846, |
|
"grad_norm": 8.666064262390137, |
|
"learning_rate": 3.857142857142857e-07, |
|
"loss": 0.3349, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 42.61538461538461, |
|
"grad_norm": 25.002635955810547, |
|
"learning_rate": 3.845238095238095e-07, |
|
"loss": 0.3166, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"grad_norm": 13.861302375793457, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"loss": 0.3475, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 42.92307692307692, |
|
"grad_norm": 11.409740447998047, |
|
"learning_rate": 3.821428571428571e-07, |
|
"loss": 0.345, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 42.92307692307692, |
|
"eval_accuracy": 0.8681318681318682, |
|
"eval_loss": 0.4540693759918213, |
|
"eval_runtime": 1.1168, |
|
"eval_samples_per_second": 244.446, |
|
"eval_steps_per_second": 4.477, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 21.283527374267578, |
|
"learning_rate": 3.809523809523809e-07, |
|
"loss": 0.3316, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 43.23076923076923, |
|
"grad_norm": 7.409358978271484, |
|
"learning_rate": 3.7976190476190474e-07, |
|
"loss": 0.3305, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 43.38461538461539, |
|
"grad_norm": 11.763964653015137, |
|
"learning_rate": 3.785714285714285e-07, |
|
"loss": 0.3293, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 43.53846153846154, |
|
"grad_norm": 5.29448127746582, |
|
"learning_rate": 3.7738095238095235e-07, |
|
"loss": 0.2883, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 43.69230769230769, |
|
"grad_norm": 16.18635368347168, |
|
"learning_rate": 3.761904761904762e-07, |
|
"loss": 0.3549, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 43.84615384615385, |
|
"grad_norm": 17.321565628051758, |
|
"learning_rate": 3.75e-07, |
|
"loss": 0.3016, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 8.436405181884766, |
|
"learning_rate": 3.738095238095238e-07, |
|
"loss": 0.3254, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.45843759179115295, |
|
"eval_runtime": 1.1597, |
|
"eval_samples_per_second": 235.408, |
|
"eval_steps_per_second": 4.311, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 44.15384615384615, |
|
"grad_norm": 11.177824974060059, |
|
"learning_rate": 3.726190476190476e-07, |
|
"loss": 0.3276, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 44.30769230769231, |
|
"grad_norm": 10.337651252746582, |
|
"learning_rate": 3.7142857142857145e-07, |
|
"loss": 0.3419, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 44.46153846153846, |
|
"grad_norm": 21.42737579345703, |
|
"learning_rate": 3.7023809523809523e-07, |
|
"loss": 0.2689, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 44.61538461538461, |
|
"grad_norm": 11.9132661819458, |
|
"learning_rate": 3.6904761904761906e-07, |
|
"loss": 0.3776, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 44.76923076923077, |
|
"grad_norm": 14.86318302154541, |
|
"learning_rate": 3.678571428571429e-07, |
|
"loss": 0.3984, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"grad_norm": 15.56070327758789, |
|
"learning_rate": 3.666666666666666e-07, |
|
"loss": 0.3164, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.4591527581214905, |
|
"eval_runtime": 1.1076, |
|
"eval_samples_per_second": 246.479, |
|
"eval_steps_per_second": 4.514, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 45.07692307692308, |
|
"grad_norm": 14.275500297546387, |
|
"learning_rate": 3.6547619047619045e-07, |
|
"loss": 0.2905, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 45.23076923076923, |
|
"grad_norm": 11.206035614013672, |
|
"learning_rate": 3.642857142857143e-07, |
|
"loss": 0.2992, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 45.38461538461539, |
|
"grad_norm": 10.007750511169434, |
|
"learning_rate": 3.6309523809523805e-07, |
|
"loss": 0.3662, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 45.53846153846154, |
|
"grad_norm": 13.443836212158203, |
|
"learning_rate": 3.619047619047619e-07, |
|
"loss": 0.3129, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 45.69230769230769, |
|
"grad_norm": 22.08678436279297, |
|
"learning_rate": 3.607142857142857e-07, |
|
"loss": 0.3904, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"grad_norm": 24.27136993408203, |
|
"learning_rate": 3.595238095238095e-07, |
|
"loss": 0.3193, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 9.215178489685059, |
|
"learning_rate": 3.583333333333333e-07, |
|
"loss": 0.3657, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8608058608058609, |
|
"eval_loss": 0.4533578157424927, |
|
"eval_runtime": 1.1346, |
|
"eval_samples_per_second": 240.624, |
|
"eval_steps_per_second": 4.407, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 13.869656562805176, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": 0.3241, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 46.30769230769231, |
|
"grad_norm": 13.870816230773926, |
|
"learning_rate": 3.5595238095238094e-07, |
|
"loss": 0.294, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 46.46153846153846, |
|
"grad_norm": 5.2440338134765625, |
|
"learning_rate": 3.5476190476190477e-07, |
|
"loss": 0.3046, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 46.61538461538461, |
|
"grad_norm": 11.387068748474121, |
|
"learning_rate": 3.535714285714286e-07, |
|
"loss": 0.4067, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"grad_norm": 10.643122673034668, |
|
"learning_rate": 3.523809523809524e-07, |
|
"loss": 0.3217, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 46.92307692307692, |
|
"grad_norm": 21.845155715942383, |
|
"learning_rate": 3.5119047619047615e-07, |
|
"loss": 0.2655, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 46.92307692307692, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.4501632750034332, |
|
"eval_runtime": 1.1413, |
|
"eval_samples_per_second": 239.204, |
|
"eval_steps_per_second": 4.381, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 47.07692307692308, |
|
"grad_norm": 12.1947603225708, |
|
"learning_rate": 3.5e-07, |
|
"loss": 0.2994, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 47.23076923076923, |
|
"grad_norm": 21.314617156982422, |
|
"learning_rate": 3.4880952380952376e-07, |
|
"loss": 0.2751, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 47.38461538461539, |
|
"grad_norm": 5.636424541473389, |
|
"learning_rate": 3.476190476190476e-07, |
|
"loss": 0.292, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 47.53846153846154, |
|
"grad_norm": 11.540352821350098, |
|
"learning_rate": 3.464285714285714e-07, |
|
"loss": 0.3693, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 47.69230769230769, |
|
"grad_norm": 9.435784339904785, |
|
"learning_rate": 3.452380952380952e-07, |
|
"loss": 0.334, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 47.84615384615385, |
|
"grad_norm": 14.798314094543457, |
|
"learning_rate": 3.4404761904761903e-07, |
|
"loss": 0.2237, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 10.26159954071045, |
|
"learning_rate": 3.4285714285714286e-07, |
|
"loss": 0.2981, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.4451758563518524, |
|
"eval_runtime": 1.2162, |
|
"eval_samples_per_second": 224.473, |
|
"eval_steps_per_second": 4.111, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 48.15384615384615, |
|
"grad_norm": 19.33696937561035, |
|
"learning_rate": 3.4166666666666664e-07, |
|
"loss": 0.2414, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 48.30769230769231, |
|
"grad_norm": 8.318500518798828, |
|
"learning_rate": 3.4047619047619047e-07, |
|
"loss": 0.3193, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 48.46153846153846, |
|
"grad_norm": 19.92133140563965, |
|
"learning_rate": 3.392857142857143e-07, |
|
"loss": 0.3218, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 48.61538461538461, |
|
"grad_norm": 18.465848922729492, |
|
"learning_rate": 3.380952380952381e-07, |
|
"loss": 0.3046, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 48.76923076923077, |
|
"grad_norm": 20.254159927368164, |
|
"learning_rate": 3.369047619047619e-07, |
|
"loss": 0.3651, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"grad_norm": 9.707018852233887, |
|
"learning_rate": 3.357142857142857e-07, |
|
"loss": 0.3508, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.4371393620967865, |
|
"eval_runtime": 1.2035, |
|
"eval_samples_per_second": 226.845, |
|
"eval_steps_per_second": 4.155, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 49.07692307692308, |
|
"grad_norm": 9.588353157043457, |
|
"learning_rate": 3.3452380952380947e-07, |
|
"loss": 0.4015, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 19.106985092163086, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.2461, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 49.38461538461539, |
|
"grad_norm": 18.668371200561523, |
|
"learning_rate": 3.3214285714285713e-07, |
|
"loss": 0.3201, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 49.53846153846154, |
|
"grad_norm": 22.97618865966797, |
|
"learning_rate": 3.309523809523809e-07, |
|
"loss": 0.2973, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 49.69230769230769, |
|
"grad_norm": 15.057040214538574, |
|
"learning_rate": 3.2976190476190474e-07, |
|
"loss": 0.2931, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"grad_norm": 9.635611534118652, |
|
"learning_rate": 3.2857142857142857e-07, |
|
"loss": 0.3173, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 8.364943504333496, |
|
"learning_rate": 3.2738095238095235e-07, |
|
"loss": 0.3419, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.43940499424934387, |
|
"eval_runtime": 1.1837, |
|
"eval_samples_per_second": 230.634, |
|
"eval_steps_per_second": 4.224, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 50.15384615384615, |
|
"grad_norm": 10.205245018005371, |
|
"learning_rate": 3.261904761904762e-07, |
|
"loss": 0.3744, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 50.30769230769231, |
|
"grad_norm": 8.429767608642578, |
|
"learning_rate": 3.25e-07, |
|
"loss": 0.2852, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 50.46153846153846, |
|
"grad_norm": 19.509811401367188, |
|
"learning_rate": 3.238095238095238e-07, |
|
"loss": 0.283, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 50.61538461538461, |
|
"grad_norm": 12.072210311889648, |
|
"learning_rate": 3.226190476190476e-07, |
|
"loss": 0.2955, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"grad_norm": 19.032461166381836, |
|
"learning_rate": 3.2142857142857145e-07, |
|
"loss": 0.351, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 50.92307692307692, |
|
"grad_norm": 24.8001708984375, |
|
"learning_rate": 3.202380952380952e-07, |
|
"loss": 0.2668, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 50.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4430113434791565, |
|
"eval_runtime": 1.1828, |
|
"eval_samples_per_second": 230.809, |
|
"eval_steps_per_second": 4.227, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 51.07692307692308, |
|
"grad_norm": 7.619977951049805, |
|
"learning_rate": 3.19047619047619e-07, |
|
"loss": 0.2316, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 51.23076923076923, |
|
"grad_norm": 15.534941673278809, |
|
"learning_rate": 3.1785714285714284e-07, |
|
"loss": 0.2948, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 51.38461538461539, |
|
"grad_norm": 6.369411945343018, |
|
"learning_rate": 3.166666666666666e-07, |
|
"loss": 0.2319, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 51.53846153846154, |
|
"grad_norm": 12.510886192321777, |
|
"learning_rate": 3.1547619047619045e-07, |
|
"loss": 0.3389, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 51.69230769230769, |
|
"grad_norm": 9.731184005737305, |
|
"learning_rate": 3.142857142857143e-07, |
|
"loss": 0.2822, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 51.84615384615385, |
|
"grad_norm": 6.847411155700684, |
|
"learning_rate": 3.1309523809523806e-07, |
|
"loss": 0.3447, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 15.400504112243652, |
|
"learning_rate": 3.119047619047619e-07, |
|
"loss": 0.2972, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.43954789638519287, |
|
"eval_runtime": 1.1577, |
|
"eval_samples_per_second": 235.817, |
|
"eval_steps_per_second": 4.319, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 52.15384615384615, |
|
"grad_norm": 14.40497875213623, |
|
"learning_rate": 3.107142857142857e-07, |
|
"loss": 0.2947, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 12.60912799835205, |
|
"learning_rate": 3.095238095238095e-07, |
|
"loss": 0.2866, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 52.46153846153846, |
|
"grad_norm": 10.782893180847168, |
|
"learning_rate": 3.0833333333333333e-07, |
|
"loss": 0.275, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 52.61538461538461, |
|
"grad_norm": 14.848359107971191, |
|
"learning_rate": 3.0714285714285716e-07, |
|
"loss": 0.3377, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 52.76923076923077, |
|
"grad_norm": 16.875308990478516, |
|
"learning_rate": 3.0595238095238094e-07, |
|
"loss": 0.2871, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"grad_norm": 10.62590217590332, |
|
"learning_rate": 3.0476190476190477e-07, |
|
"loss": 0.3514, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.437090665102005, |
|
"eval_runtime": 1.1411, |
|
"eval_samples_per_second": 239.24, |
|
"eval_steps_per_second": 4.382, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 53.07692307692308, |
|
"grad_norm": 19.662609100341797, |
|
"learning_rate": 3.0357142857142855e-07, |
|
"loss": 0.2457, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 53.23076923076923, |
|
"grad_norm": 10.951351165771484, |
|
"learning_rate": 3.023809523809523e-07, |
|
"loss": 0.2542, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 53.38461538461539, |
|
"grad_norm": 6.810473918914795, |
|
"learning_rate": 3.0119047619047616e-07, |
|
"loss": 0.2873, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 53.53846153846154, |
|
"grad_norm": 11.747807502746582, |
|
"learning_rate": 3e-07, |
|
"loss": 0.3965, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 53.69230769230769, |
|
"grad_norm": 12.671740531921387, |
|
"learning_rate": 2.9880952380952376e-07, |
|
"loss": 0.3395, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"grad_norm": 10.57718276977539, |
|
"learning_rate": 2.976190476190476e-07, |
|
"loss": 0.3071, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 7.219900131225586, |
|
"learning_rate": 2.9642857142857143e-07, |
|
"loss": 0.3012, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.43296605348587036, |
|
"eval_runtime": 1.1556, |
|
"eval_samples_per_second": 236.25, |
|
"eval_steps_per_second": 4.327, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 54.15384615384615, |
|
"grad_norm": 7.588340759277344, |
|
"learning_rate": 2.952380952380952e-07, |
|
"loss": 0.3188, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 54.30769230769231, |
|
"grad_norm": 20.165128707885742, |
|
"learning_rate": 2.9404761904761904e-07, |
|
"loss": 0.377, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 54.46153846153846, |
|
"grad_norm": 9.232548713684082, |
|
"learning_rate": 2.9285714285714287e-07, |
|
"loss": 0.276, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 54.61538461538461, |
|
"grad_norm": 8.916671752929688, |
|
"learning_rate": 2.916666666666667e-07, |
|
"loss": 0.3599, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"grad_norm": 11.789280891418457, |
|
"learning_rate": 2.904761904761905e-07, |
|
"loss": 0.3004, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 54.92307692307692, |
|
"grad_norm": 8.527569770812988, |
|
"learning_rate": 2.892857142857143e-07, |
|
"loss": 0.2725, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 54.92307692307692, |
|
"eval_accuracy": 0.8791208791208791, |
|
"eval_loss": 0.4297783672809601, |
|
"eval_runtime": 1.1219, |
|
"eval_samples_per_second": 243.346, |
|
"eval_steps_per_second": 4.457, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 55.07692307692308, |
|
"grad_norm": 9.423765182495117, |
|
"learning_rate": 2.8809523809523803e-07, |
|
"loss": 0.2731, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 55.23076923076923, |
|
"grad_norm": 13.653554916381836, |
|
"learning_rate": 2.8690476190476186e-07, |
|
"loss": 0.4064, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 7.217813491821289, |
|
"learning_rate": 2.857142857142857e-07, |
|
"loss": 0.305, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 55.53846153846154, |
|
"grad_norm": 7.166869640350342, |
|
"learning_rate": 2.845238095238095e-07, |
|
"loss": 0.2488, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 55.69230769230769, |
|
"grad_norm": 6.370850086212158, |
|
"learning_rate": 2.833333333333333e-07, |
|
"loss": 0.2946, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 55.84615384615385, |
|
"grad_norm": 19.36492919921875, |
|
"learning_rate": 2.8214285714285713e-07, |
|
"loss": 0.3314, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 10.563324928283691, |
|
"learning_rate": 2.8095238095238096e-07, |
|
"loss": 0.2547, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4288838803768158, |
|
"eval_runtime": 1.1679, |
|
"eval_samples_per_second": 233.746, |
|
"eval_steps_per_second": 4.281, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 56.15384615384615, |
|
"grad_norm": 10.190791130065918, |
|
"learning_rate": 2.7976190476190474e-07, |
|
"loss": 0.2646, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 56.30769230769231, |
|
"grad_norm": 16.170412063598633, |
|
"learning_rate": 2.785714285714286e-07, |
|
"loss": 0.3392, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 56.46153846153846, |
|
"grad_norm": 7.313807964324951, |
|
"learning_rate": 2.773809523809524e-07, |
|
"loss": 0.2909, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 56.61538461538461, |
|
"grad_norm": 9.653914451599121, |
|
"learning_rate": 2.761904761904762e-07, |
|
"loss": 0.3295, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 56.76923076923077, |
|
"grad_norm": 6.966893672943115, |
|
"learning_rate": 2.75e-07, |
|
"loss": 0.2956, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"grad_norm": 15.753593444824219, |
|
"learning_rate": 2.7380952380952385e-07, |
|
"loss": 0.2896, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4282112419605255, |
|
"eval_runtime": 1.1522, |
|
"eval_samples_per_second": 236.937, |
|
"eval_steps_per_second": 4.339, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 57.07692307692308, |
|
"grad_norm": 13.775495529174805, |
|
"learning_rate": 2.7261904761904757e-07, |
|
"loss": 0.386, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 57.23076923076923, |
|
"grad_norm": 6.003649711608887, |
|
"learning_rate": 2.714285714285714e-07, |
|
"loss": 0.2473, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 57.38461538461539, |
|
"grad_norm": 22.614078521728516, |
|
"learning_rate": 2.7023809523809523e-07, |
|
"loss": 0.4098, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 57.53846153846154, |
|
"grad_norm": 15.326905250549316, |
|
"learning_rate": 2.69047619047619e-07, |
|
"loss": 0.2628, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 57.69230769230769, |
|
"grad_norm": 6.482524871826172, |
|
"learning_rate": 2.6785714285714284e-07, |
|
"loss": 0.3312, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"grad_norm": 19.687318801879883, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"loss": 0.3625, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"grad_norm": 11.662421226501465, |
|
"learning_rate": 2.6547619047619045e-07, |
|
"loss": 0.3469, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4272923469543457, |
|
"eval_runtime": 1.1242, |
|
"eval_samples_per_second": 242.848, |
|
"eval_steps_per_second": 4.448, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 58.15384615384615, |
|
"grad_norm": 8.805383682250977, |
|
"learning_rate": 2.642857142857143e-07, |
|
"loss": 0.3987, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 58.30769230769231, |
|
"grad_norm": 11.661012649536133, |
|
"learning_rate": 2.630952380952381e-07, |
|
"loss": 0.2147, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 12.969446182250977, |
|
"learning_rate": 2.619047619047619e-07, |
|
"loss": 0.3074, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 58.61538461538461, |
|
"grad_norm": 9.435002326965332, |
|
"learning_rate": 2.607142857142857e-07, |
|
"loss": 0.2626, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"grad_norm": 14.217181205749512, |
|
"learning_rate": 2.5952380952380955e-07, |
|
"loss": 0.2264, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 58.92307692307692, |
|
"grad_norm": 13.90890884399414, |
|
"learning_rate": 2.5833333333333333e-07, |
|
"loss": 0.3528, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 58.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4268935024738312, |
|
"eval_runtime": 1.1071, |
|
"eval_samples_per_second": 246.586, |
|
"eval_steps_per_second": 4.516, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 59.07692307692308, |
|
"grad_norm": 6.627048492431641, |
|
"learning_rate": 2.571428571428571e-07, |
|
"loss": 0.2552, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 59.23076923076923, |
|
"grad_norm": 14.275843620300293, |
|
"learning_rate": 2.5595238095238094e-07, |
|
"loss": 0.2876, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 59.38461538461539, |
|
"grad_norm": 15.865604400634766, |
|
"learning_rate": 2.547619047619047e-07, |
|
"loss": 0.3701, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 59.53846153846154, |
|
"grad_norm": 12.051728248596191, |
|
"learning_rate": 2.5357142857142855e-07, |
|
"loss": 0.2598, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 59.69230769230769, |
|
"grad_norm": 11.886255264282227, |
|
"learning_rate": 2.523809523809524e-07, |
|
"loss": 0.238, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 59.84615384615385, |
|
"grad_norm": 12.55905818939209, |
|
"learning_rate": 2.5119047619047616e-07, |
|
"loss": 0.2893, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 18.840225219726562, |
|
"learning_rate": 2.5e-07, |
|
"loss": 0.2552, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8681318681318682, |
|
"eval_loss": 0.4324240982532501, |
|
"eval_runtime": 1.1584, |
|
"eval_samples_per_second": 235.676, |
|
"eval_steps_per_second": 4.316, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 60.15384615384615, |
|
"grad_norm": 7.739254474639893, |
|
"learning_rate": 2.488095238095238e-07, |
|
"loss": 0.3825, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 60.30769230769231, |
|
"grad_norm": 27.177335739135742, |
|
"learning_rate": 2.476190476190476e-07, |
|
"loss": 0.3577, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 60.46153846153846, |
|
"grad_norm": 8.35522747039795, |
|
"learning_rate": 2.4642857142857143e-07, |
|
"loss": 0.296, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 60.61538461538461, |
|
"grad_norm": 12.505022048950195, |
|
"learning_rate": 2.452380952380952e-07, |
|
"loss": 0.2922, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 60.76923076923077, |
|
"grad_norm": 8.860665321350098, |
|
"learning_rate": 2.4404761904761904e-07, |
|
"loss": 0.3924, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"grad_norm": 13.714400291442871, |
|
"learning_rate": 2.4285714285714287e-07, |
|
"loss": 0.239, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.8644688644688645, |
|
"eval_loss": 0.4319455325603485, |
|
"eval_runtime": 1.158, |
|
"eval_samples_per_second": 235.758, |
|
"eval_steps_per_second": 4.318, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 61.07692307692308, |
|
"grad_norm": 11.82459831237793, |
|
"learning_rate": 2.4166666666666665e-07, |
|
"loss": 0.2555, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 61.23076923076923, |
|
"grad_norm": 13.206809997558594, |
|
"learning_rate": 2.404761904761905e-07, |
|
"loss": 0.3353, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 61.38461538461539, |
|
"grad_norm": 11.129719734191895, |
|
"learning_rate": 2.392857142857143e-07, |
|
"loss": 0.313, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 19.73814582824707, |
|
"learning_rate": 2.3809523809523806e-07, |
|
"loss": 0.2591, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 61.69230769230769, |
|
"grad_norm": 10.5856351852417, |
|
"learning_rate": 2.369047619047619e-07, |
|
"loss": 0.3416, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"grad_norm": 12.684653282165527, |
|
"learning_rate": 2.357142857142857e-07, |
|
"loss": 0.2315, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"grad_norm": 8.558398246765137, |
|
"learning_rate": 2.345238095238095e-07, |
|
"loss": 0.3321, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.42702218890190125, |
|
"eval_runtime": 1.1757, |
|
"eval_samples_per_second": 232.2, |
|
"eval_steps_per_second": 4.253, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 62.15384615384615, |
|
"grad_norm": 9.598026275634766, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"loss": 0.3119, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 62.30769230769231, |
|
"grad_norm": 13.107952117919922, |
|
"learning_rate": 2.3214285714285714e-07, |
|
"loss": 0.3379, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 62.46153846153846, |
|
"grad_norm": 18.639419555664062, |
|
"learning_rate": 2.3095238095238097e-07, |
|
"loss": 0.2689, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 62.61538461538461, |
|
"grad_norm": 17.175498962402344, |
|
"learning_rate": 2.2976190476190475e-07, |
|
"loss": 0.3154, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"grad_norm": 11.377558708190918, |
|
"learning_rate": 2.2857142857142855e-07, |
|
"loss": 0.2969, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 62.92307692307692, |
|
"grad_norm": 12.16077709197998, |
|
"learning_rate": 2.2738095238095238e-07, |
|
"loss": 0.3115, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 62.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.41838309168815613, |
|
"eval_runtime": 1.133, |
|
"eval_samples_per_second": 240.951, |
|
"eval_steps_per_second": 4.413, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 63.07692307692308, |
|
"grad_norm": 29.19352912902832, |
|
"learning_rate": 2.2619047619047619e-07, |
|
"loss": 0.327, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 63.23076923076923, |
|
"grad_norm": 21.762849807739258, |
|
"learning_rate": 2.25e-07, |
|
"loss": 0.3142, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 63.38461538461539, |
|
"grad_norm": 20.668453216552734, |
|
"learning_rate": 2.2380952380952382e-07, |
|
"loss": 0.2705, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 63.53846153846154, |
|
"grad_norm": 10.485206604003906, |
|
"learning_rate": 2.226190476190476e-07, |
|
"loss": 0.2635, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 63.69230769230769, |
|
"grad_norm": 5.819842338562012, |
|
"learning_rate": 2.214285714285714e-07, |
|
"loss": 0.281, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 63.84615384615385, |
|
"grad_norm": 8.578193664550781, |
|
"learning_rate": 2.2023809523809523e-07, |
|
"loss": 0.2981, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 14.02076244354248, |
|
"learning_rate": 2.1904761904761904e-07, |
|
"loss": 0.306, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4168645739555359, |
|
"eval_runtime": 1.1838, |
|
"eval_samples_per_second": 230.615, |
|
"eval_steps_per_second": 4.224, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 64.15384615384616, |
|
"grad_norm": 8.580323219299316, |
|
"learning_rate": 2.1785714285714284e-07, |
|
"loss": 0.2481, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 64.3076923076923, |
|
"grad_norm": 12.701449394226074, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"loss": 0.2978, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 64.46153846153847, |
|
"grad_norm": 7.2950544357299805, |
|
"learning_rate": 2.1547619047619048e-07, |
|
"loss": 0.2778, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 14.55117416381836, |
|
"learning_rate": 2.1428571428571426e-07, |
|
"loss": 0.3589, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 64.76923076923077, |
|
"grad_norm": 15.672528266906738, |
|
"learning_rate": 2.130952380952381e-07, |
|
"loss": 0.3638, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"grad_norm": 8.848112106323242, |
|
"learning_rate": 2.119047619047619e-07, |
|
"loss": 0.3086, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.41758179664611816, |
|
"eval_runtime": 1.1479, |
|
"eval_samples_per_second": 237.817, |
|
"eval_steps_per_second": 4.356, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 65.07692307692308, |
|
"grad_norm": 20.250051498413086, |
|
"learning_rate": 2.107142857142857e-07, |
|
"loss": 0.2965, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 65.23076923076923, |
|
"grad_norm": 8.315861701965332, |
|
"learning_rate": 2.0952380952380953e-07, |
|
"loss": 0.3119, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 65.38461538461539, |
|
"grad_norm": 7.086258888244629, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"loss": 0.2904, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 65.53846153846153, |
|
"grad_norm": 9.35214900970459, |
|
"learning_rate": 2.0714285714285714e-07, |
|
"loss": 0.2787, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 65.6923076923077, |
|
"grad_norm": 11.061731338500977, |
|
"learning_rate": 2.0595238095238094e-07, |
|
"loss": 0.287, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"grad_norm": 17.736583709716797, |
|
"learning_rate": 2.0476190476190475e-07, |
|
"loss": 0.3101, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"grad_norm": 20.51115608215332, |
|
"learning_rate": 2.0357142857142855e-07, |
|
"loss": 0.4256, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4195899963378906, |
|
"eval_runtime": 1.2137, |
|
"eval_samples_per_second": 224.925, |
|
"eval_steps_per_second": 4.119, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 66.15384615384616, |
|
"grad_norm": 7.543363094329834, |
|
"learning_rate": 2.0238095238095238e-07, |
|
"loss": 0.1944, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 66.3076923076923, |
|
"grad_norm": 15.504809379577637, |
|
"learning_rate": 2.0119047619047619e-07, |
|
"loss": 0.2767, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 66.46153846153847, |
|
"grad_norm": 12.454192161560059, |
|
"learning_rate": 2e-07, |
|
"loss": 0.2697, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 66.61538461538461, |
|
"grad_norm": 11.19575023651123, |
|
"learning_rate": 1.988095238095238e-07, |
|
"loss": 0.2626, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"grad_norm": 8.080245971679688, |
|
"learning_rate": 1.976190476190476e-07, |
|
"loss": 0.3262, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 66.92307692307692, |
|
"grad_norm": 16.002864837646484, |
|
"learning_rate": 1.964285714285714e-07, |
|
"loss": 0.2798, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 66.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4219285249710083, |
|
"eval_runtime": 1.1309, |
|
"eval_samples_per_second": 241.401, |
|
"eval_steps_per_second": 4.421, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 67.07692307692308, |
|
"grad_norm": 9.127429008483887, |
|
"learning_rate": 1.9523809523809524e-07, |
|
"loss": 0.2828, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 67.23076923076923, |
|
"grad_norm": 9.812334060668945, |
|
"learning_rate": 1.9404761904761904e-07, |
|
"loss": 0.3028, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 67.38461538461539, |
|
"grad_norm": 16.504629135131836, |
|
"learning_rate": 1.9285714285714284e-07, |
|
"loss": 0.2382, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 67.53846153846153, |
|
"grad_norm": 15.225912094116211, |
|
"learning_rate": 1.9166666666666668e-07, |
|
"loss": 0.3387, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 32.0360221862793, |
|
"learning_rate": 1.9047619047619045e-07, |
|
"loss": 0.252, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 67.84615384615384, |
|
"grad_norm": 10.604074478149414, |
|
"learning_rate": 1.8928571428571426e-07, |
|
"loss": 0.3423, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"grad_norm": 11.707740783691406, |
|
"learning_rate": 1.880952380952381e-07, |
|
"loss": 0.3016, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4223931133747101, |
|
"eval_runtime": 1.1901, |
|
"eval_samples_per_second": 229.39, |
|
"eval_steps_per_second": 4.201, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 68.15384615384616, |
|
"grad_norm": 15.720952987670898, |
|
"learning_rate": 1.869047619047619e-07, |
|
"loss": 0.2488, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 68.3076923076923, |
|
"grad_norm": 21.468849182128906, |
|
"learning_rate": 1.8571428571428572e-07, |
|
"loss": 0.3655, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 68.46153846153847, |
|
"grad_norm": 7.170112133026123, |
|
"learning_rate": 1.8452380952380953e-07, |
|
"loss": 0.2293, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 68.61538461538461, |
|
"grad_norm": 13.677178382873535, |
|
"learning_rate": 1.833333333333333e-07, |
|
"loss": 0.3512, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 68.76923076923077, |
|
"grad_norm": 15.128756523132324, |
|
"learning_rate": 1.8214285714285714e-07, |
|
"loss": 0.3291, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"grad_norm": 9.91322135925293, |
|
"learning_rate": 1.8095238095238094e-07, |
|
"loss": 0.2791, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.42070242762565613, |
|
"eval_runtime": 1.1329, |
|
"eval_samples_per_second": 240.966, |
|
"eval_steps_per_second": 4.413, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 69.07692307692308, |
|
"grad_norm": 18.428592681884766, |
|
"learning_rate": 1.7976190476190475e-07, |
|
"loss": 0.2857, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 69.23076923076923, |
|
"grad_norm": 13.563444137573242, |
|
"learning_rate": 1.7857142857142858e-07, |
|
"loss": 0.3579, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 69.38461538461539, |
|
"grad_norm": 7.071059226989746, |
|
"learning_rate": 1.7738095238095238e-07, |
|
"loss": 0.288, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 69.53846153846153, |
|
"grad_norm": 13.733859062194824, |
|
"learning_rate": 1.761904761904762e-07, |
|
"loss": 0.3107, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 69.6923076923077, |
|
"grad_norm": 24.475296020507812, |
|
"learning_rate": 1.75e-07, |
|
"loss": 0.3036, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"grad_norm": 8.03947925567627, |
|
"learning_rate": 1.738095238095238e-07, |
|
"loss": 0.3046, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 12.644840240478516, |
|
"learning_rate": 1.726190476190476e-07, |
|
"loss": 0.2651, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4188561737537384, |
|
"eval_runtime": 1.1824, |
|
"eval_samples_per_second": 230.892, |
|
"eval_steps_per_second": 4.229, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 70.15384615384616, |
|
"grad_norm": 10.41529655456543, |
|
"learning_rate": 1.7142857142857143e-07, |
|
"loss": 0.2741, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 70.3076923076923, |
|
"grad_norm": 13.780343055725098, |
|
"learning_rate": 1.7023809523809524e-07, |
|
"loss": 0.1817, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 70.46153846153847, |
|
"grad_norm": 15.142984390258789, |
|
"learning_rate": 1.6904761904761904e-07, |
|
"loss": 0.2511, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 70.61538461538461, |
|
"grad_norm": 11.113754272460938, |
|
"learning_rate": 1.6785714285714285e-07, |
|
"loss": 0.3617, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 9.247007369995117, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": 0.3423, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 70.92307692307692, |
|
"grad_norm": 10.317591667175293, |
|
"learning_rate": 1.6547619047619045e-07, |
|
"loss": 0.2466, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 70.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4177640378475189, |
|
"eval_runtime": 1.1382, |
|
"eval_samples_per_second": 239.846, |
|
"eval_steps_per_second": 4.393, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 71.07692307692308, |
|
"grad_norm": 12.4044771194458, |
|
"learning_rate": 1.6428571428571429e-07, |
|
"loss": 0.274, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 71.23076923076923, |
|
"grad_norm": 12.682540893554688, |
|
"learning_rate": 1.630952380952381e-07, |
|
"loss": 0.3048, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 71.38461538461539, |
|
"grad_norm": 25.84153175354004, |
|
"learning_rate": 1.619047619047619e-07, |
|
"loss": 0.2463, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 71.53846153846153, |
|
"grad_norm": 13.235491752624512, |
|
"learning_rate": 1.6071428571428573e-07, |
|
"loss": 0.415, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 71.6923076923077, |
|
"grad_norm": 6.873939514160156, |
|
"learning_rate": 1.595238095238095e-07, |
|
"loss": 0.3163, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 71.84615384615384, |
|
"grad_norm": 16.569108963012695, |
|
"learning_rate": 1.583333333333333e-07, |
|
"loss": 0.3067, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 18.71702003479004, |
|
"learning_rate": 1.5714285714285714e-07, |
|
"loss": 0.1913, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4177253544330597, |
|
"eval_runtime": 1.1798, |
|
"eval_samples_per_second": 231.388, |
|
"eval_steps_per_second": 4.238, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 72.15384615384616, |
|
"grad_norm": 21.70823097229004, |
|
"learning_rate": 1.5595238095238094e-07, |
|
"loss": 0.3477, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 72.3076923076923, |
|
"grad_norm": 18.4373779296875, |
|
"learning_rate": 1.5476190476190475e-07, |
|
"loss": 0.3226, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 72.46153846153847, |
|
"grad_norm": 12.795662879943848, |
|
"learning_rate": 1.5357142857142858e-07, |
|
"loss": 0.36, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 72.61538461538461, |
|
"grad_norm": 6.41522741317749, |
|
"learning_rate": 1.5238095238095238e-07, |
|
"loss": 0.2615, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 72.76923076923077, |
|
"grad_norm": 11.777397155761719, |
|
"learning_rate": 1.5119047619047616e-07, |
|
"loss": 0.2648, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"grad_norm": 14.508996963500977, |
|
"learning_rate": 1.5e-07, |
|
"loss": 0.2719, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4163550138473511, |
|
"eval_runtime": 1.1181, |
|
"eval_samples_per_second": 244.158, |
|
"eval_steps_per_second": 4.472, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 73.07692307692308, |
|
"grad_norm": 15.088624000549316, |
|
"learning_rate": 1.488095238095238e-07, |
|
"loss": 0.3153, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 73.23076923076923, |
|
"grad_norm": 6.610665798187256, |
|
"learning_rate": 1.476190476190476e-07, |
|
"loss": 0.2292, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 73.38461538461539, |
|
"grad_norm": 19.090049743652344, |
|
"learning_rate": 1.4642857142857143e-07, |
|
"loss": 0.2765, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 73.53846153846153, |
|
"grad_norm": 19.875932693481445, |
|
"learning_rate": 1.4523809523809524e-07, |
|
"loss": 0.2797, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 73.6923076923077, |
|
"grad_norm": 21.46002960205078, |
|
"learning_rate": 1.4404761904761902e-07, |
|
"loss": 0.2846, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 9.745357513427734, |
|
"learning_rate": 1.4285714285714285e-07, |
|
"loss": 0.3138, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"grad_norm": 9.963276863098145, |
|
"learning_rate": 1.4166666666666665e-07, |
|
"loss": 0.3364, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.41662341356277466, |
|
"eval_runtime": 1.1761, |
|
"eval_samples_per_second": 232.123, |
|
"eval_steps_per_second": 4.251, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 74.15384615384616, |
|
"grad_norm": 12.587307929992676, |
|
"learning_rate": 1.4047619047619048e-07, |
|
"loss": 0.3476, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 74.3076923076923, |
|
"grad_norm": 15.544804573059082, |
|
"learning_rate": 1.392857142857143e-07, |
|
"loss": 0.298, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 74.46153846153847, |
|
"grad_norm": 16.263813018798828, |
|
"learning_rate": 1.380952380952381e-07, |
|
"loss": 0.3103, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 74.61538461538461, |
|
"grad_norm": 15.350561141967773, |
|
"learning_rate": 1.3690476190476192e-07, |
|
"loss": 0.3083, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"grad_norm": 18.922351837158203, |
|
"learning_rate": 1.357142857142857e-07, |
|
"loss": 0.2371, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 74.92307692307692, |
|
"grad_norm": 10.000033378601074, |
|
"learning_rate": 1.345238095238095e-07, |
|
"loss": 0.283, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 74.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4179239273071289, |
|
"eval_runtime": 1.1425, |
|
"eval_samples_per_second": 238.952, |
|
"eval_steps_per_second": 4.376, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 75.07692307692308, |
|
"grad_norm": 11.999493598937988, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"loss": 0.2356, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 75.23076923076923, |
|
"grad_norm": 15.124945640563965, |
|
"learning_rate": 1.3214285714285714e-07, |
|
"loss": 0.2473, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 75.38461538461539, |
|
"grad_norm": 5.885743618011475, |
|
"learning_rate": 1.3095238095238095e-07, |
|
"loss": 0.2623, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 75.53846153846153, |
|
"grad_norm": 17.72136116027832, |
|
"learning_rate": 1.2976190476190478e-07, |
|
"loss": 0.2667, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 75.6923076923077, |
|
"grad_norm": 17.649593353271484, |
|
"learning_rate": 1.2857142857142855e-07, |
|
"loss": 0.3016, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 75.84615384615384, |
|
"grad_norm": 15.614336013793945, |
|
"learning_rate": 1.2738095238095236e-07, |
|
"loss": 0.2771, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"grad_norm": 6.446508884429932, |
|
"learning_rate": 1.261904761904762e-07, |
|
"loss": 0.2891, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4174346923828125, |
|
"eval_runtime": 1.1254, |
|
"eval_samples_per_second": 242.591, |
|
"eval_steps_per_second": 4.443, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 76.15384615384616, |
|
"grad_norm": 8.419719696044922, |
|
"learning_rate": 1.25e-07, |
|
"loss": 0.2639, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 76.3076923076923, |
|
"grad_norm": 8.500795364379883, |
|
"learning_rate": 1.238095238095238e-07, |
|
"loss": 0.2902, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 76.46153846153847, |
|
"grad_norm": 9.533052444458008, |
|
"learning_rate": 1.226190476190476e-07, |
|
"loss": 0.3342, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 76.61538461538461, |
|
"grad_norm": 32.33898162841797, |
|
"learning_rate": 1.2142857142857143e-07, |
|
"loss": 0.3059, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 76.76923076923077, |
|
"grad_norm": 14.258476257324219, |
|
"learning_rate": 1.2023809523809524e-07, |
|
"loss": 0.249, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 28.383731842041016, |
|
"learning_rate": 1.1904761904761903e-07, |
|
"loss": 0.2625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4179657995700836, |
|
"eval_runtime": 1.0921, |
|
"eval_samples_per_second": 249.988, |
|
"eval_steps_per_second": 4.579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 77.07692307692308, |
|
"grad_norm": 14.453038215637207, |
|
"learning_rate": 1.1785714285714285e-07, |
|
"loss": 0.3501, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 77.23076923076923, |
|
"grad_norm": 15.141488075256348, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"loss": 0.2959, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 77.38461538461539, |
|
"grad_norm": 27.595386505126953, |
|
"learning_rate": 1.1547619047619048e-07, |
|
"loss": 0.3883, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 77.53846153846153, |
|
"grad_norm": 7.041699409484863, |
|
"learning_rate": 1.1428571428571427e-07, |
|
"loss": 0.2567, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 77.6923076923077, |
|
"grad_norm": 5.878727912902832, |
|
"learning_rate": 1.1309523809523809e-07, |
|
"loss": 0.1855, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"grad_norm": 9.228104591369629, |
|
"learning_rate": 1.1190476190476191e-07, |
|
"loss": 0.2607, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"grad_norm": 10.124485969543457, |
|
"learning_rate": 1.107142857142857e-07, |
|
"loss": 0.2843, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4184325635433197, |
|
"eval_runtime": 1.2364, |
|
"eval_samples_per_second": 220.81, |
|
"eval_steps_per_second": 4.044, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 78.15384615384616, |
|
"grad_norm": 32.02544403076172, |
|
"learning_rate": 1.0952380952380952e-07, |
|
"loss": 0.2707, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 78.3076923076923, |
|
"grad_norm": 8.309106826782227, |
|
"learning_rate": 1.0833333333333334e-07, |
|
"loss": 0.3501, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 78.46153846153847, |
|
"grad_norm": 10.85938549041748, |
|
"learning_rate": 1.0714285714285713e-07, |
|
"loss": 0.3379, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 78.61538461538461, |
|
"grad_norm": 14.523594856262207, |
|
"learning_rate": 1.0595238095238095e-07, |
|
"loss": 0.2191, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"grad_norm": 16.100353240966797, |
|
"learning_rate": 1.0476190476190476e-07, |
|
"loss": 0.3082, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 78.92307692307692, |
|
"grad_norm": 10.894936561584473, |
|
"learning_rate": 1.0357142857142857e-07, |
|
"loss": 0.375, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 78.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.41671693325042725, |
|
"eval_runtime": 1.116, |
|
"eval_samples_per_second": 244.622, |
|
"eval_steps_per_second": 4.48, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 79.07692307692308, |
|
"grad_norm": 10.38732624053955, |
|
"learning_rate": 1.0238095238095237e-07, |
|
"loss": 0.2812, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 79.23076923076923, |
|
"grad_norm": 15.733484268188477, |
|
"learning_rate": 1.0119047619047619e-07, |
|
"loss": 0.3247, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 79.38461538461539, |
|
"grad_norm": 6.772809028625488, |
|
"learning_rate": 1e-07, |
|
"loss": 0.221, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 79.53846153846153, |
|
"grad_norm": 8.480406761169434, |
|
"learning_rate": 9.88095238095238e-08, |
|
"loss": 0.2817, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 79.6923076923077, |
|
"grad_norm": 8.911038398742676, |
|
"learning_rate": 9.761904761904762e-08, |
|
"loss": 0.3514, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 79.84615384615384, |
|
"grad_norm": 18.952770233154297, |
|
"learning_rate": 9.642857142857142e-08, |
|
"loss": 0.2881, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 6.310261249542236, |
|
"learning_rate": 9.523809523809523e-08, |
|
"loss": 0.3107, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.41499239206314087, |
|
"eval_runtime": 1.191, |
|
"eval_samples_per_second": 229.225, |
|
"eval_steps_per_second": 4.198, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 80.15384615384616, |
|
"grad_norm": 17.95909309387207, |
|
"learning_rate": 9.404761904761904e-08, |
|
"loss": 0.2551, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 80.3076923076923, |
|
"grad_norm": 27.223407745361328, |
|
"learning_rate": 9.285714285714286e-08, |
|
"loss": 0.3356, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 80.46153846153847, |
|
"grad_norm": 6.595218658447266, |
|
"learning_rate": 9.166666666666665e-08, |
|
"loss": 0.2383, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 80.61538461538461, |
|
"grad_norm": 20.459001541137695, |
|
"learning_rate": 9.047619047619047e-08, |
|
"loss": 0.3265, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 80.76923076923077, |
|
"grad_norm": 15.349759101867676, |
|
"learning_rate": 8.928571428571429e-08, |
|
"loss": 0.2763, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"grad_norm": 10.789344787597656, |
|
"learning_rate": 8.80952380952381e-08, |
|
"loss": 0.3742, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4144986569881439, |
|
"eval_runtime": 1.1418, |
|
"eval_samples_per_second": 239.087, |
|
"eval_steps_per_second": 4.379, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 81.07692307692308, |
|
"grad_norm": 13.075380325317383, |
|
"learning_rate": 8.69047619047619e-08, |
|
"loss": 0.2957, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 81.23076923076923, |
|
"grad_norm": 10.450531959533691, |
|
"learning_rate": 8.571428571428572e-08, |
|
"loss": 0.3245, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 81.38461538461539, |
|
"grad_norm": 20.416603088378906, |
|
"learning_rate": 8.452380952380952e-08, |
|
"loss": 0.2355, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 81.53846153846153, |
|
"grad_norm": 11.954909324645996, |
|
"learning_rate": 8.333333333333333e-08, |
|
"loss": 0.2847, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 81.6923076923077, |
|
"grad_norm": 10.024072647094727, |
|
"learning_rate": 8.214285714285714e-08, |
|
"loss": 0.3908, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"grad_norm": 13.764399528503418, |
|
"learning_rate": 8.095238095238095e-08, |
|
"loss": 0.2062, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"grad_norm": 18.268247604370117, |
|
"learning_rate": 7.976190476190475e-08, |
|
"loss": 0.2574, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4145357608795166, |
|
"eval_runtime": 1.2255, |
|
"eval_samples_per_second": 222.772, |
|
"eval_steps_per_second": 4.08, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 82.15384615384616, |
|
"grad_norm": 11.204299926757812, |
|
"learning_rate": 7.857142857142857e-08, |
|
"loss": 0.294, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 82.3076923076923, |
|
"grad_norm": 12.602919578552246, |
|
"learning_rate": 7.738095238095237e-08, |
|
"loss": 0.3379, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 82.46153846153847, |
|
"grad_norm": 17.001785278320312, |
|
"learning_rate": 7.619047619047619e-08, |
|
"loss": 0.2501, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 82.61538461538461, |
|
"grad_norm": 10.472540855407715, |
|
"learning_rate": 7.5e-08, |
|
"loss": 0.2673, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"grad_norm": 12.93094539642334, |
|
"learning_rate": 7.38095238095238e-08, |
|
"loss": 0.3303, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 82.92307692307692, |
|
"grad_norm": 15.572615623474121, |
|
"learning_rate": 7.261904761904762e-08, |
|
"loss": 0.329, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 82.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.41488900780677795, |
|
"eval_runtime": 1.1463, |
|
"eval_samples_per_second": 238.152, |
|
"eval_steps_per_second": 4.362, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 8.833599090576172, |
|
"learning_rate": 7.142857142857142e-08, |
|
"loss": 0.2862, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 83.23076923076923, |
|
"grad_norm": 7.227090358734131, |
|
"learning_rate": 7.023809523809524e-08, |
|
"loss": 0.2553, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 83.38461538461539, |
|
"grad_norm": 16.44085693359375, |
|
"learning_rate": 6.904761904761905e-08, |
|
"loss": 0.3129, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 83.53846153846153, |
|
"grad_norm": 13.633960723876953, |
|
"learning_rate": 6.785714285714285e-08, |
|
"loss": 0.3134, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 83.6923076923077, |
|
"grad_norm": 16.555570602416992, |
|
"learning_rate": 6.666666666666667e-08, |
|
"loss": 0.2504, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 83.84615384615384, |
|
"grad_norm": 7.340324878692627, |
|
"learning_rate": 6.547619047619047e-08, |
|
"loss": 0.2966, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"grad_norm": 10.442778587341309, |
|
"learning_rate": 6.428571428571428e-08, |
|
"loss": 0.2727, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.4145146608352661, |
|
"eval_runtime": 1.1279, |
|
"eval_samples_per_second": 242.042, |
|
"eval_steps_per_second": 4.433, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 84.15384615384616, |
|
"grad_norm": 9.072919845581055, |
|
"learning_rate": 6.30952380952381e-08, |
|
"loss": 0.2461, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 84.3076923076923, |
|
"grad_norm": 8.624760627746582, |
|
"learning_rate": 6.19047619047619e-08, |
|
"loss": 0.2812, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 84.46153846153847, |
|
"grad_norm": 8.95349407196045, |
|
"learning_rate": 6.071428571428572e-08, |
|
"loss": 0.2835, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 84.61538461538461, |
|
"grad_norm": 18.060441970825195, |
|
"learning_rate": 5.9523809523809515e-08, |
|
"loss": 0.2697, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 84.76923076923077, |
|
"grad_norm": 15.820292472839355, |
|
"learning_rate": 5.833333333333333e-08, |
|
"loss": 0.3266, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"grad_norm": 10.503725051879883, |
|
"learning_rate": 5.714285714285714e-08, |
|
"loss": 0.2977, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.8754578754578755, |
|
"eval_loss": 0.41494670510292053, |
|
"eval_runtime": 1.1373, |
|
"eval_samples_per_second": 240.052, |
|
"eval_steps_per_second": 4.397, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 85.07692307692308, |
|
"grad_norm": 9.66882610321045, |
|
"learning_rate": 5.5952380952380955e-08, |
|
"loss": 0.3452, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 85.23076923076923, |
|
"grad_norm": 9.360061645507812, |
|
"learning_rate": 5.476190476190476e-08, |
|
"loss": 0.3033, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 85.38461538461539, |
|
"grad_norm": 10.878594398498535, |
|
"learning_rate": 5.3571428571428564e-08, |
|
"loss": 0.323, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 85.53846153846153, |
|
"grad_norm": 9.788655281066895, |
|
"learning_rate": 5.238095238095238e-08, |
|
"loss": 0.2796, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 85.6923076923077, |
|
"grad_norm": 11.249568939208984, |
|
"learning_rate": 5.1190476190476187e-08, |
|
"loss": 0.2381, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"grad_norm": 22.544105529785156, |
|
"learning_rate": 5e-08, |
|
"loss": 0.3593, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"grad_norm": 9.197429656982422, |
|
"learning_rate": 4.880952380952381e-08, |
|
"loss": 0.2611, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4160268008708954, |
|
"eval_runtime": 1.1319, |
|
"eval_samples_per_second": 241.183, |
|
"eval_steps_per_second": 4.417, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 10.480982780456543, |
|
"learning_rate": 4.7619047619047613e-08, |
|
"loss": 0.2919, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 86.3076923076923, |
|
"grad_norm": 15.81564712524414, |
|
"learning_rate": 4.642857142857143e-08, |
|
"loss": 0.3395, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 86.46153846153847, |
|
"grad_norm": 25.986629486083984, |
|
"learning_rate": 4.5238095238095236e-08, |
|
"loss": 0.3436, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 86.61538461538461, |
|
"grad_norm": 20.077136993408203, |
|
"learning_rate": 4.404761904761905e-08, |
|
"loss": 0.363, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"grad_norm": 16.67424774169922, |
|
"learning_rate": 4.285714285714286e-08, |
|
"loss": 0.2731, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 86.92307692307692, |
|
"grad_norm": 16.004140853881836, |
|
"learning_rate": 4.166666666666666e-08, |
|
"loss": 0.2542, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 86.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4169901907444, |
|
"eval_runtime": 1.1368, |
|
"eval_samples_per_second": 240.144, |
|
"eval_steps_per_second": 4.398, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 87.07692307692308, |
|
"grad_norm": 18.212156295776367, |
|
"learning_rate": 4.0476190476190474e-08, |
|
"loss": 0.3265, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 87.23076923076923, |
|
"grad_norm": 12.494462966918945, |
|
"learning_rate": 3.9285714285714285e-08, |
|
"loss": 0.3226, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 87.38461538461539, |
|
"grad_norm": 6.6637773513793945, |
|
"learning_rate": 3.8095238095238096e-08, |
|
"loss": 0.2262, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 87.53846153846153, |
|
"grad_norm": 12.791160583496094, |
|
"learning_rate": 3.69047619047619e-08, |
|
"loss": 0.3541, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 87.6923076923077, |
|
"grad_norm": 9.732351303100586, |
|
"learning_rate": 3.571428571428571e-08, |
|
"loss": 0.2551, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 87.84615384615384, |
|
"grad_norm": 21.216760635375977, |
|
"learning_rate": 3.452380952380952e-08, |
|
"loss": 0.2629, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"grad_norm": 15.293636322021484, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"loss": 0.2665, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.41707876324653625, |
|
"eval_runtime": 1.1463, |
|
"eval_samples_per_second": 238.151, |
|
"eval_steps_per_second": 4.362, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 88.15384615384616, |
|
"grad_norm": 11.668240547180176, |
|
"learning_rate": 3.214285714285714e-08, |
|
"loss": 0.3315, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 88.3076923076923, |
|
"grad_norm": 28.14773178100586, |
|
"learning_rate": 3.095238095238095e-08, |
|
"loss": 0.3098, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 88.46153846153847, |
|
"grad_norm": 13.815803527832031, |
|
"learning_rate": 2.9761904761904758e-08, |
|
"loss": 0.3381, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 88.61538461538461, |
|
"grad_norm": 11.341737747192383, |
|
"learning_rate": 2.857142857142857e-08, |
|
"loss": 0.2995, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 88.76923076923077, |
|
"grad_norm": 9.191329002380371, |
|
"learning_rate": 2.738095238095238e-08, |
|
"loss": 0.2485, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"grad_norm": 12.781599998474121, |
|
"learning_rate": 2.619047619047619e-08, |
|
"loss": 0.2654, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4170469343662262, |
|
"eval_runtime": 1.1144, |
|
"eval_samples_per_second": 244.964, |
|
"eval_steps_per_second": 4.487, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 89.07692307692308, |
|
"grad_norm": 18.813859939575195, |
|
"learning_rate": 2.5e-08, |
|
"loss": 0.4085, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 9.056866645812988, |
|
"learning_rate": 2.3809523809523807e-08, |
|
"loss": 0.2377, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 89.38461538461539, |
|
"grad_norm": 21.75194549560547, |
|
"learning_rate": 2.2619047619047618e-08, |
|
"loss": 0.2511, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 89.53846153846153, |
|
"grad_norm": 24.87982177734375, |
|
"learning_rate": 2.142857142857143e-08, |
|
"loss": 0.3468, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 89.6923076923077, |
|
"grad_norm": 13.90104866027832, |
|
"learning_rate": 2.0238095238095237e-08, |
|
"loss": 0.2001, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"grad_norm": 13.514042854309082, |
|
"learning_rate": 1.9047619047619048e-08, |
|
"loss": 0.2673, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 17.871490478515625, |
|
"learning_rate": 1.7857142857142856e-08, |
|
"loss": 0.3059, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4172230660915375, |
|
"eval_runtime": 1.1156, |
|
"eval_samples_per_second": 244.721, |
|
"eval_steps_per_second": 4.482, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 90.15384615384616, |
|
"grad_norm": 9.703531265258789, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"loss": 0.1731, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 90.3076923076923, |
|
"grad_norm": 13.011953353881836, |
|
"learning_rate": 1.5476190476190475e-08, |
|
"loss": 0.3008, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 90.46153846153847, |
|
"grad_norm": 13.331348419189453, |
|
"learning_rate": 1.4285714285714284e-08, |
|
"loss": 0.2402, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 90.61538461538461, |
|
"grad_norm": 9.919706344604492, |
|
"learning_rate": 1.3095238095238096e-08, |
|
"loss": 0.3376, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"grad_norm": 20.184898376464844, |
|
"learning_rate": 1.1904761904761903e-08, |
|
"loss": 0.3106, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 90.92307692307692, |
|
"grad_norm": 6.9222588539123535, |
|
"learning_rate": 1.0714285714285715e-08, |
|
"loss": 0.2377, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 90.92307692307692, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.417271226644516, |
|
"eval_runtime": 1.1559, |
|
"eval_samples_per_second": 236.178, |
|
"eval_steps_per_second": 4.326, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 91.07692307692308, |
|
"grad_norm": 10.644214630126953, |
|
"learning_rate": 9.523809523809524e-09, |
|
"loss": 0.2738, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 91.23076923076923, |
|
"grad_norm": 12.530488014221191, |
|
"learning_rate": 8.333333333333334e-09, |
|
"loss": 0.3082, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 91.38461538461539, |
|
"grad_norm": 10.068582534790039, |
|
"learning_rate": 7.142857142857142e-09, |
|
"loss": 0.2678, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 91.53846153846153, |
|
"grad_norm": 8.5696439743042, |
|
"learning_rate": 5.952380952380952e-09, |
|
"loss": 0.2902, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 91.6923076923077, |
|
"grad_norm": 22.662599563598633, |
|
"learning_rate": 4.761904761904762e-09, |
|
"loss": 0.2826, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 91.84615384615384, |
|
"grad_norm": 19.666378021240234, |
|
"learning_rate": 3.571428571428571e-09, |
|
"loss": 0.3109, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"grad_norm": 15.434282302856445, |
|
"learning_rate": 2.380952380952381e-09, |
|
"loss": 0.2896, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4172247350215912, |
|
"eval_runtime": 1.1283, |
|
"eval_samples_per_second": 241.962, |
|
"eval_steps_per_second": 4.432, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 92.15384615384616, |
|
"grad_norm": 21.7716064453125, |
|
"learning_rate": 1.1904761904761905e-09, |
|
"loss": 0.3775, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 9.635214805603027, |
|
"learning_rate": 0.0, |
|
"loss": 0.3133, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.4172203838825226, |
|
"eval_runtime": 1.1209, |
|
"eval_samples_per_second": 243.565, |
|
"eval_steps_per_second": 4.461, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 600, |
|
"total_flos": 1.4722503891660472e+18, |
|
"train_loss": 0.3897706772387028, |
|
"train_runtime": 753.6703, |
|
"train_samples_per_second": 108.137, |
|
"train_steps_per_second": 0.796 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4722503891660472e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|