|
{ |
|
"best_metric": 1.1846204996109009, |
|
"best_model_checkpoint": "./results/checkpoint-2000", |
|
"epoch": 0.8, |
|
"eval_steps": 4, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9951999999999997e-07, |
|
"loss": 2.6285, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.4697508811950684, |
|
"eval_runtime": 0.485, |
|
"eval_samples_per_second": 8.248, |
|
"eval_steps_per_second": 2.062, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9904e-07, |
|
"loss": 2.6222, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.465975284576416, |
|
"eval_runtime": 0.6323, |
|
"eval_samples_per_second": 6.326, |
|
"eval_steps_per_second": 1.582, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9856e-07, |
|
"loss": 2.6536, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.460374116897583, |
|
"eval_runtime": 0.6478, |
|
"eval_samples_per_second": 6.175, |
|
"eval_steps_per_second": 1.544, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9808e-07, |
|
"loss": 2.6785, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.4556970596313477, |
|
"eval_runtime": 0.6653, |
|
"eval_samples_per_second": 6.012, |
|
"eval_steps_per_second": 1.503, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9759999999999996e-07, |
|
"loss": 2.6085, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.4514715671539307, |
|
"eval_runtime": 0.5241, |
|
"eval_samples_per_second": 7.632, |
|
"eval_steps_per_second": 1.908, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9711999999999995e-07, |
|
"loss": 2.5907, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.4462974071502686, |
|
"eval_runtime": 0.4689, |
|
"eval_samples_per_second": 8.53, |
|
"eval_steps_per_second": 2.133, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9664e-07, |
|
"loss": 2.5942, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.4415194988250732, |
|
"eval_runtime": 0.4829, |
|
"eval_samples_per_second": 8.284, |
|
"eval_steps_per_second": 2.071, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9615999999999997e-07, |
|
"loss": 2.6101, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.437161922454834, |
|
"eval_runtime": 0.4715, |
|
"eval_samples_per_second": 8.483, |
|
"eval_steps_per_second": 2.121, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9568e-07, |
|
"loss": 2.5827, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.432689666748047, |
|
"eval_runtime": 0.4938, |
|
"eval_samples_per_second": 8.1, |
|
"eval_steps_per_second": 2.025, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.5729, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.4281153678894043, |
|
"eval_runtime": 0.5021, |
|
"eval_samples_per_second": 7.966, |
|
"eval_steps_per_second": 1.991, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9472e-07, |
|
"loss": 2.5856, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.423053741455078, |
|
"eval_runtime": 0.593, |
|
"eval_samples_per_second": 6.746, |
|
"eval_steps_per_second": 1.686, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9423999999999997e-07, |
|
"loss": 2.589, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.418571949005127, |
|
"eval_runtime": 0.6933, |
|
"eval_samples_per_second": 5.77, |
|
"eval_steps_per_second": 1.442, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9375999999999995e-07, |
|
"loss": 2.6483, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.414531946182251, |
|
"eval_runtime": 0.7167, |
|
"eval_samples_per_second": 5.581, |
|
"eval_steps_per_second": 1.395, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9328e-07, |
|
"loss": 2.517, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.409538745880127, |
|
"eval_runtime": 0.4826, |
|
"eval_samples_per_second": 8.289, |
|
"eval_steps_per_second": 2.072, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.928e-07, |
|
"loss": 2.5987, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.4050426483154297, |
|
"eval_runtime": 0.4757, |
|
"eval_samples_per_second": 8.409, |
|
"eval_steps_per_second": 2.102, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9232e-07, |
|
"loss": 2.5489, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.400360107421875, |
|
"eval_runtime": 0.4945, |
|
"eval_samples_per_second": 8.089, |
|
"eval_steps_per_second": 2.022, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9184e-07, |
|
"loss": 2.5063, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.396500587463379, |
|
"eval_runtime": 0.5, |
|
"eval_samples_per_second": 8.001, |
|
"eval_steps_per_second": 2.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9136e-07, |
|
"loss": 2.5867, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3916146755218506, |
|
"eval_runtime": 0.4602, |
|
"eval_samples_per_second": 8.693, |
|
"eval_steps_per_second": 2.173, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9087999999999997e-07, |
|
"loss": 2.544, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3873047828674316, |
|
"eval_runtime": 0.4731, |
|
"eval_samples_per_second": 8.456, |
|
"eval_steps_per_second": 2.114, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9039999999999995e-07, |
|
"loss": 2.5596, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.382803440093994, |
|
"eval_runtime": 0.6092, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 1.642, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.8992e-07, |
|
"loss": 2.5744, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.3786380290985107, |
|
"eval_runtime": 0.7212, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.387, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8944e-07, |
|
"loss": 2.5588, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.374176502227783, |
|
"eval_runtime": 0.6826, |
|
"eval_samples_per_second": 5.86, |
|
"eval_steps_per_second": 1.465, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8895999999999996e-07, |
|
"loss": 2.5579, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.3702104091644287, |
|
"eval_runtime": 0.4896, |
|
"eval_samples_per_second": 8.169, |
|
"eval_steps_per_second": 2.042, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8848e-07, |
|
"loss": 2.5245, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.3660218715667725, |
|
"eval_runtime": 0.4764, |
|
"eval_samples_per_second": 8.397, |
|
"eval_steps_per_second": 2.099, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.88e-07, |
|
"loss": 2.5132, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.36110520362854, |
|
"eval_runtime": 0.4799, |
|
"eval_samples_per_second": 8.335, |
|
"eval_steps_per_second": 2.084, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8751999999999997e-07, |
|
"loss": 2.5037, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.3570125102996826, |
|
"eval_runtime": 0.4722, |
|
"eval_samples_per_second": 8.47, |
|
"eval_steps_per_second": 2.118, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8704e-07, |
|
"loss": 2.4727, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.3530666828155518, |
|
"eval_runtime": 0.467, |
|
"eval_samples_per_second": 8.565, |
|
"eval_steps_per_second": 2.141, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8656e-07, |
|
"loss": 2.4709, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.348759412765503, |
|
"eval_runtime": 0.501, |
|
"eval_samples_per_second": 7.984, |
|
"eval_steps_per_second": 1.996, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8608e-07, |
|
"loss": 2.4711, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.344454050064087, |
|
"eval_runtime": 0.6607, |
|
"eval_samples_per_second": 6.054, |
|
"eval_steps_per_second": 1.513, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8559999999999996e-07, |
|
"loss": 2.5445, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.3402156829833984, |
|
"eval_runtime": 0.704, |
|
"eval_samples_per_second": 5.682, |
|
"eval_steps_per_second": 1.42, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8512e-07, |
|
"loss": 2.4994, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.3362019062042236, |
|
"eval_runtime": 0.6849, |
|
"eval_samples_per_second": 5.84, |
|
"eval_steps_per_second": 1.46, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8464e-07, |
|
"loss": 2.5036, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.3319339752197266, |
|
"eval_runtime": 0.4864, |
|
"eval_samples_per_second": 8.223, |
|
"eval_steps_per_second": 2.056, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8416e-07, |
|
"loss": 2.5525, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.3276522159576416, |
|
"eval_runtime": 0.4783, |
|
"eval_samples_per_second": 8.364, |
|
"eval_steps_per_second": 2.091, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8368e-07, |
|
"loss": 2.5245, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 2.3241090774536133, |
|
"eval_runtime": 0.4805, |
|
"eval_samples_per_second": 8.324, |
|
"eval_steps_per_second": 2.081, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.832e-07, |
|
"loss": 2.4946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.3198165893554688, |
|
"eval_runtime": 0.473, |
|
"eval_samples_per_second": 8.457, |
|
"eval_steps_per_second": 2.114, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8272e-07, |
|
"loss": 2.5142, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.3152613639831543, |
|
"eval_runtime": 0.4858, |
|
"eval_samples_per_second": 8.234, |
|
"eval_steps_per_second": 2.058, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8223999999999997e-07, |
|
"loss": 2.4639, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.3112645149230957, |
|
"eval_runtime": 0.488, |
|
"eval_samples_per_second": 8.196, |
|
"eval_steps_per_second": 2.049, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8176e-07, |
|
"loss": 2.4796, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.307020902633667, |
|
"eval_runtime": 0.6163, |
|
"eval_samples_per_second": 6.49, |
|
"eval_steps_per_second": 1.623, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8128e-07, |
|
"loss": 2.4529, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.303062915802002, |
|
"eval_runtime": 0.6764, |
|
"eval_samples_per_second": 5.913, |
|
"eval_steps_per_second": 1.478, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.808e-07, |
|
"loss": 2.4823, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 2.2993311882019043, |
|
"eval_runtime": 0.6854, |
|
"eval_samples_per_second": 5.836, |
|
"eval_steps_per_second": 1.459, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8032e-07, |
|
"loss": 2.4439, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2947850227355957, |
|
"eval_runtime": 0.4745, |
|
"eval_samples_per_second": 8.429, |
|
"eval_steps_per_second": 2.107, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7984e-07, |
|
"loss": 2.4652, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2908992767333984, |
|
"eval_runtime": 0.4759, |
|
"eval_samples_per_second": 8.406, |
|
"eval_steps_per_second": 2.101, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7936e-07, |
|
"loss": 2.4574, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2867026329040527, |
|
"eval_runtime": 0.4973, |
|
"eval_samples_per_second": 8.043, |
|
"eval_steps_per_second": 2.011, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7887999999999997e-07, |
|
"loss": 2.4557, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.283027172088623, |
|
"eval_runtime": 0.4719, |
|
"eval_samples_per_second": 8.477, |
|
"eval_steps_per_second": 2.119, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.784e-07, |
|
"loss": 2.4462, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2787420749664307, |
|
"eval_runtime": 0.472, |
|
"eval_samples_per_second": 8.474, |
|
"eval_steps_per_second": 2.119, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.7792e-07, |
|
"loss": 2.3962, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.2745461463928223, |
|
"eval_runtime": 0.6328, |
|
"eval_samples_per_second": 6.322, |
|
"eval_steps_per_second": 1.58, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7744e-07, |
|
"loss": 2.3666, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.2705912590026855, |
|
"eval_runtime": 0.6375, |
|
"eval_samples_per_second": 6.274, |
|
"eval_steps_per_second": 1.569, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7696e-07, |
|
"loss": 2.5024, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.266995906829834, |
|
"eval_runtime": 0.6984, |
|
"eval_samples_per_second": 5.727, |
|
"eval_steps_per_second": 1.432, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7648e-07, |
|
"loss": 2.4419, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.2626519203186035, |
|
"eval_runtime": 0.7334, |
|
"eval_samples_per_second": 5.454, |
|
"eval_steps_per_second": 1.363, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.76e-07, |
|
"loss": 2.4246, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.2583603858947754, |
|
"eval_runtime": 0.48, |
|
"eval_samples_per_second": 8.333, |
|
"eval_steps_per_second": 2.083, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7551999999999997e-07, |
|
"loss": 2.3853, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.2551512718200684, |
|
"eval_runtime": 0.4939, |
|
"eval_samples_per_second": 8.098, |
|
"eval_steps_per_second": 2.025, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7503999999999995e-07, |
|
"loss": 2.4032, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.251105785369873, |
|
"eval_runtime": 0.46, |
|
"eval_samples_per_second": 8.695, |
|
"eval_steps_per_second": 2.174, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7456e-07, |
|
"loss": 2.4444, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.247025489807129, |
|
"eval_runtime": 0.4948, |
|
"eval_samples_per_second": 8.084, |
|
"eval_steps_per_second": 2.021, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7408e-07, |
|
"loss": 2.2932, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.242764472961426, |
|
"eval_runtime": 0.4897, |
|
"eval_samples_per_second": 8.168, |
|
"eval_steps_per_second": 2.042, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.736e-07, |
|
"loss": 2.3929, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.2391483783721924, |
|
"eval_runtime": 0.6128, |
|
"eval_samples_per_second": 6.528, |
|
"eval_steps_per_second": 1.632, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7312e-07, |
|
"loss": 2.4112, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.234977960586548, |
|
"eval_runtime": 0.648, |
|
"eval_samples_per_second": 6.172, |
|
"eval_steps_per_second": 1.543, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7264e-07, |
|
"loss": 2.4191, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.231099843978882, |
|
"eval_runtime": 0.6862, |
|
"eval_samples_per_second": 5.829, |
|
"eval_steps_per_second": 1.457, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7215999999999997e-07, |
|
"loss": 2.4408, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.2272462844848633, |
|
"eval_runtime": 0.7076, |
|
"eval_samples_per_second": 5.653, |
|
"eval_steps_per_second": 1.413, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.7167999999999996e-07, |
|
"loss": 2.3884, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.223376750946045, |
|
"eval_runtime": 0.5169, |
|
"eval_samples_per_second": 7.738, |
|
"eval_steps_per_second": 1.935, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.712e-07, |
|
"loss": 2.3689, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.2195653915405273, |
|
"eval_runtime": 0.4793, |
|
"eval_samples_per_second": 8.346, |
|
"eval_steps_per_second": 2.086, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7072e-07, |
|
"loss": 2.3689, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.2153775691986084, |
|
"eval_runtime": 0.4771, |
|
"eval_samples_per_second": 8.384, |
|
"eval_steps_per_second": 2.096, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7024e-07, |
|
"loss": 2.3249, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.211355209350586, |
|
"eval_runtime": 0.4778, |
|
"eval_samples_per_second": 8.372, |
|
"eval_steps_per_second": 2.093, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.6976e-07, |
|
"loss": 2.4286, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.207773208618164, |
|
"eval_runtime": 0.4873, |
|
"eval_samples_per_second": 8.209, |
|
"eval_steps_per_second": 2.052, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.6928e-07, |
|
"loss": 2.3497, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.203867197036743, |
|
"eval_runtime": 0.6281, |
|
"eval_samples_per_second": 6.368, |
|
"eval_steps_per_second": 1.592, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.6879999999999997e-07, |
|
"loss": 2.284, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.199937582015991, |
|
"eval_runtime": 0.6885, |
|
"eval_samples_per_second": 5.81, |
|
"eval_steps_per_second": 1.452, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6831999999999996e-07, |
|
"loss": 2.3333, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1958465576171875, |
|
"eval_runtime": 0.6799, |
|
"eval_samples_per_second": 5.883, |
|
"eval_steps_per_second": 1.471, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6784e-07, |
|
"loss": 2.3305, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.192072868347168, |
|
"eval_runtime": 0.7165, |
|
"eval_samples_per_second": 5.583, |
|
"eval_steps_per_second": 1.396, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6736e-07, |
|
"loss": 2.3465, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1882476806640625, |
|
"eval_runtime": 0.485, |
|
"eval_samples_per_second": 8.247, |
|
"eval_steps_per_second": 2.062, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6687999999999997e-07, |
|
"loss": 2.3274, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1841320991516113, |
|
"eval_runtime": 0.4767, |
|
"eval_samples_per_second": 8.391, |
|
"eval_steps_per_second": 2.098, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.664e-07, |
|
"loss": 2.3641, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.1803271770477295, |
|
"eval_runtime": 0.5146, |
|
"eval_samples_per_second": 7.774, |
|
"eval_steps_per_second": 1.943, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.6592e-07, |
|
"loss": 2.3089, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.176274538040161, |
|
"eval_runtime": 0.488, |
|
"eval_samples_per_second": 8.196, |
|
"eval_steps_per_second": 2.049, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6543999999999997e-07, |
|
"loss": 2.2645, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1720588207244873, |
|
"eval_runtime": 0.4973, |
|
"eval_samples_per_second": 8.043, |
|
"eval_steps_per_second": 2.011, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6495999999999996e-07, |
|
"loss": 2.3439, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1687240600585938, |
|
"eval_runtime": 0.6283, |
|
"eval_samples_per_second": 6.366, |
|
"eval_steps_per_second": 1.592, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6448e-07, |
|
"loss": 2.3285, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1649253368377686, |
|
"eval_runtime": 0.6996, |
|
"eval_samples_per_second": 5.718, |
|
"eval_steps_per_second": 1.429, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.64e-07, |
|
"loss": 2.3126, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.160398483276367, |
|
"eval_runtime": 0.6904, |
|
"eval_samples_per_second": 5.794, |
|
"eval_steps_per_second": 1.448, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6351999999999997e-07, |
|
"loss": 2.3356, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1570284366607666, |
|
"eval_runtime": 0.4953, |
|
"eval_samples_per_second": 8.076, |
|
"eval_steps_per_second": 2.019, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6304e-07, |
|
"loss": 2.3396, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.1527013778686523, |
|
"eval_runtime": 0.4977, |
|
"eval_samples_per_second": 8.037, |
|
"eval_steps_per_second": 2.009, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.6256e-07, |
|
"loss": 2.2972, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.148724317550659, |
|
"eval_runtime": 0.4939, |
|
"eval_samples_per_second": 8.099, |
|
"eval_steps_per_second": 2.025, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6208e-07, |
|
"loss": 2.3321, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1449663639068604, |
|
"eval_runtime": 0.4784, |
|
"eval_samples_per_second": 8.362, |
|
"eval_steps_per_second": 2.09, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.616e-07, |
|
"loss": 2.3348, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1414906978607178, |
|
"eval_runtime": 0.4949, |
|
"eval_samples_per_second": 8.082, |
|
"eval_steps_per_second": 2.021, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6112e-07, |
|
"loss": 2.2728, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1374001502990723, |
|
"eval_runtime": 0.6321, |
|
"eval_samples_per_second": 6.328, |
|
"eval_steps_per_second": 1.582, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6064e-07, |
|
"loss": 2.287, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1333529949188232, |
|
"eval_runtime": 0.6547, |
|
"eval_samples_per_second": 6.109, |
|
"eval_steps_per_second": 1.527, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.6015999999999997e-07, |
|
"loss": 2.2474, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.1297547817230225, |
|
"eval_runtime": 0.7093, |
|
"eval_samples_per_second": 5.639, |
|
"eval_steps_per_second": 1.41, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.5968e-07, |
|
"loss": 2.3214, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.126392364501953, |
|
"eval_runtime": 0.6909, |
|
"eval_samples_per_second": 5.789, |
|
"eval_steps_per_second": 1.447, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.592e-07, |
|
"loss": 2.2725, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.122309923171997, |
|
"eval_runtime": 0.4823, |
|
"eval_samples_per_second": 8.293, |
|
"eval_steps_per_second": 2.073, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5872000000000003e-07, |
|
"loss": 2.3114, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.118303060531616, |
|
"eval_runtime": 0.4954, |
|
"eval_samples_per_second": 8.075, |
|
"eval_steps_per_second": 2.019, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5824e-07, |
|
"loss": 2.2333, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.114621162414551, |
|
"eval_runtime": 0.4856, |
|
"eval_samples_per_second": 8.238, |
|
"eval_steps_per_second": 2.059, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5776e-07, |
|
"loss": 2.2812, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.11067795753479, |
|
"eval_runtime": 0.4778, |
|
"eval_samples_per_second": 8.372, |
|
"eval_steps_per_second": 2.093, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5728e-07, |
|
"loss": 2.2454, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.106940746307373, |
|
"eval_runtime": 0.4945, |
|
"eval_samples_per_second": 8.089, |
|
"eval_steps_per_second": 2.022, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5679999999999997e-07, |
|
"loss": 2.2261, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.1031668186187744, |
|
"eval_runtime": 0.6521, |
|
"eval_samples_per_second": 6.134, |
|
"eval_steps_per_second": 1.533, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5632e-07, |
|
"loss": 2.2841, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0989203453063965, |
|
"eval_runtime": 0.6249, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 1.6, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5584e-07, |
|
"loss": 2.2481, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.095189332962036, |
|
"eval_runtime": 0.6855, |
|
"eval_samples_per_second": 5.835, |
|
"eval_steps_per_second": 1.459, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5536e-07, |
|
"loss": 2.278, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0912463665008545, |
|
"eval_runtime": 0.7393, |
|
"eval_samples_per_second": 5.411, |
|
"eval_steps_per_second": 1.353, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5488e-07, |
|
"loss": 2.2765, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.087336301803589, |
|
"eval_runtime": 0.4793, |
|
"eval_samples_per_second": 8.345, |
|
"eval_steps_per_second": 2.086, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.544e-07, |
|
"loss": 2.2232, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.0833120346069336, |
|
"eval_runtime": 0.487, |
|
"eval_samples_per_second": 8.214, |
|
"eval_steps_per_second": 2.053, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5392e-07, |
|
"loss": 2.306, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.079479932785034, |
|
"eval_runtime": 0.4722, |
|
"eval_samples_per_second": 8.471, |
|
"eval_steps_per_second": 2.118, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5343999999999997e-07, |
|
"loss": 2.2126, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0760295391082764, |
|
"eval_runtime": 0.4958, |
|
"eval_samples_per_second": 8.068, |
|
"eval_steps_per_second": 2.017, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5295999999999996e-07, |
|
"loss": 2.2557, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.072136402130127, |
|
"eval_runtime": 0.469, |
|
"eval_samples_per_second": 8.529, |
|
"eval_steps_per_second": 2.132, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5248e-07, |
|
"loss": 2.1988, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0683670043945312, |
|
"eval_runtime": 0.6385, |
|
"eval_samples_per_second": 6.264, |
|
"eval_steps_per_second": 1.566, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.52e-07, |
|
"loss": 2.1917, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0638906955718994, |
|
"eval_runtime": 0.6834, |
|
"eval_samples_per_second": 5.853, |
|
"eval_steps_per_second": 1.463, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5152e-07, |
|
"loss": 2.2479, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.0599253177642822, |
|
"eval_runtime": 0.7261, |
|
"eval_samples_per_second": 5.509, |
|
"eval_steps_per_second": 1.377, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5104e-07, |
|
"loss": 2.1484, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.055751085281372, |
|
"eval_runtime": 0.7367, |
|
"eval_samples_per_second": 5.429, |
|
"eval_steps_per_second": 1.357, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.5056e-07, |
|
"loss": 2.1886, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.052119016647339, |
|
"eval_runtime": 0.4808, |
|
"eval_samples_per_second": 8.319, |
|
"eval_steps_per_second": 2.08, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.5007999999999997e-07, |
|
"loss": 2.2026, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0482354164123535, |
|
"eval_runtime": 0.4856, |
|
"eval_samples_per_second": 8.238, |
|
"eval_steps_per_second": 2.059, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4959999999999996e-07, |
|
"loss": 2.1572, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0441887378692627, |
|
"eval_runtime": 0.4779, |
|
"eval_samples_per_second": 8.37, |
|
"eval_steps_per_second": 2.093, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4912e-07, |
|
"loss": 2.1931, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.0399935245513916, |
|
"eval_runtime": 0.4803, |
|
"eval_samples_per_second": 8.329, |
|
"eval_steps_per_second": 2.082, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4864e-07, |
|
"loss": 2.161, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.03645920753479, |
|
"eval_runtime": 0.4924, |
|
"eval_samples_per_second": 8.123, |
|
"eval_steps_per_second": 2.031, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4816e-07, |
|
"loss": 2.1115, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.032196044921875, |
|
"eval_runtime": 0.6345, |
|
"eval_samples_per_second": 6.304, |
|
"eval_steps_per_second": 1.576, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.4768e-07, |
|
"loss": 2.173, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.028397560119629, |
|
"eval_runtime": 0.6625, |
|
"eval_samples_per_second": 6.038, |
|
"eval_steps_per_second": 1.509, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.472e-07, |
|
"loss": 2.1491, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0247464179992676, |
|
"eval_runtime": 0.6969, |
|
"eval_samples_per_second": 5.74, |
|
"eval_steps_per_second": 1.435, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4672e-07, |
|
"loss": 2.1716, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0203933715820312, |
|
"eval_runtime": 0.7311, |
|
"eval_samples_per_second": 5.471, |
|
"eval_steps_per_second": 1.368, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4623999999999996e-07, |
|
"loss": 2.2031, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.016533374786377, |
|
"eval_runtime": 0.4875, |
|
"eval_samples_per_second": 8.206, |
|
"eval_steps_per_second": 2.051, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4576e-07, |
|
"loss": 2.1466, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.012568473815918, |
|
"eval_runtime": 0.4897, |
|
"eval_samples_per_second": 8.168, |
|
"eval_steps_per_second": 2.042, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4528e-07, |
|
"loss": 2.1384, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0088417530059814, |
|
"eval_runtime": 0.4969, |
|
"eval_samples_per_second": 8.05, |
|
"eval_steps_per_second": 2.013, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.4479999999999997e-07, |
|
"loss": 2.1824, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.0047850608825684, |
|
"eval_runtime": 0.4897, |
|
"eval_samples_per_second": 8.168, |
|
"eval_steps_per_second": 2.042, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4432e-07, |
|
"loss": 2.1401, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.0006463527679443, |
|
"eval_runtime": 0.4882, |
|
"eval_samples_per_second": 8.193, |
|
"eval_steps_per_second": 2.048, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4384e-07, |
|
"loss": 2.2086, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9969314336776733, |
|
"eval_runtime": 0.6612, |
|
"eval_samples_per_second": 6.049, |
|
"eval_steps_per_second": 1.512, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4336e-07, |
|
"loss": 2.1687, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9925954341888428, |
|
"eval_runtime": 0.6804, |
|
"eval_samples_per_second": 5.879, |
|
"eval_steps_per_second": 1.47, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4287999999999996e-07, |
|
"loss": 2.145, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9888066053390503, |
|
"eval_runtime": 0.6955, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 1.438, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.424e-07, |
|
"loss": 2.2007, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9850127696990967, |
|
"eval_runtime": 0.7558, |
|
"eval_samples_per_second": 5.292, |
|
"eval_steps_per_second": 1.323, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.4192e-07, |
|
"loss": 2.1367, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.9808437824249268, |
|
"eval_runtime": 0.4706, |
|
"eval_samples_per_second": 8.499, |
|
"eval_steps_per_second": 2.125, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.4143999999999997e-07, |
|
"loss": 2.1291, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9767786264419556, |
|
"eval_runtime": 0.4803, |
|
"eval_samples_per_second": 8.327, |
|
"eval_steps_per_second": 2.082, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.4096e-07, |
|
"loss": 2.1124, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9728602170944214, |
|
"eval_runtime": 0.4802, |
|
"eval_samples_per_second": 8.33, |
|
"eval_steps_per_second": 2.082, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.4048e-07, |
|
"loss": 2.0738, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.968900203704834, |
|
"eval_runtime": 0.4884, |
|
"eval_samples_per_second": 8.189, |
|
"eval_steps_per_second": 2.047, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.4e-07, |
|
"loss": 2.1048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9646457433700562, |
|
"eval_runtime": 0.5026, |
|
"eval_samples_per_second": 7.959, |
|
"eval_steps_per_second": 1.99, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3951999999999996e-07, |
|
"loss": 2.0995, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9606600999832153, |
|
"eval_runtime": 0.7928, |
|
"eval_samples_per_second": 5.045, |
|
"eval_steps_per_second": 1.261, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3903999999999995e-07, |
|
"loss": 2.0816, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.956822395324707, |
|
"eval_runtime": 0.5321, |
|
"eval_samples_per_second": 7.518, |
|
"eval_steps_per_second": 1.879, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3856e-07, |
|
"loss": 2.0969, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.9526716470718384, |
|
"eval_runtime": 0.5174, |
|
"eval_samples_per_second": 7.732, |
|
"eval_steps_per_second": 1.933, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3807999999999997e-07, |
|
"loss": 2.1034, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.948419451713562, |
|
"eval_runtime": 0.5393, |
|
"eval_samples_per_second": 7.418, |
|
"eval_steps_per_second": 1.854, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.376e-07, |
|
"loss": 2.0654, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9442145824432373, |
|
"eval_runtime": 0.5372, |
|
"eval_samples_per_second": 7.446, |
|
"eval_steps_per_second": 1.861, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3712e-07, |
|
"loss": 2.1175, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9403698444366455, |
|
"eval_runtime": 0.5129, |
|
"eval_samples_per_second": 7.798, |
|
"eval_steps_per_second": 1.95, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3663999999999998e-07, |
|
"loss": 2.0829, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.936263084411621, |
|
"eval_runtime": 0.7202, |
|
"eval_samples_per_second": 5.554, |
|
"eval_steps_per_second": 1.388, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3616e-07, |
|
"loss": 2.0973, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.9322115182876587, |
|
"eval_runtime": 0.6884, |
|
"eval_samples_per_second": 5.81, |
|
"eval_steps_per_second": 1.453, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.3567999999999998e-07, |
|
"loss": 2.0439, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.927826166152954, |
|
"eval_runtime": 0.7779, |
|
"eval_samples_per_second": 5.142, |
|
"eval_steps_per_second": 1.286, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.352e-07, |
|
"loss": 2.0791, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.923945426940918, |
|
"eval_runtime": 0.7514, |
|
"eval_samples_per_second": 5.323, |
|
"eval_steps_per_second": 1.331, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3471999999999997e-07, |
|
"loss": 2.0988, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9202955961227417, |
|
"eval_runtime": 0.5194, |
|
"eval_samples_per_second": 7.701, |
|
"eval_steps_per_second": 1.925, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3424e-07, |
|
"loss": 2.0179, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.916027307510376, |
|
"eval_runtime": 0.5072, |
|
"eval_samples_per_second": 7.887, |
|
"eval_steps_per_second": 1.972, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3376e-07, |
|
"loss": 2.0452, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.911855697631836, |
|
"eval_runtime": 0.5112, |
|
"eval_samples_per_second": 7.825, |
|
"eval_steps_per_second": 1.956, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.3327999999999998e-07, |
|
"loss": 1.9792, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.907868504524231, |
|
"eval_runtime": 0.5368, |
|
"eval_samples_per_second": 7.452, |
|
"eval_steps_per_second": 1.863, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.328e-07, |
|
"loss": 1.9862, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.9032366275787354, |
|
"eval_runtime": 0.52, |
|
"eval_samples_per_second": 7.692, |
|
"eval_steps_per_second": 1.923, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3231999999999998e-07, |
|
"loss": 2.0176, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8994207382202148, |
|
"eval_runtime": 0.5141, |
|
"eval_samples_per_second": 7.78, |
|
"eval_steps_per_second": 1.945, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3184e-07, |
|
"loss": 2.0066, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8953509330749512, |
|
"eval_runtime": 0.7027, |
|
"eval_samples_per_second": 5.692, |
|
"eval_steps_per_second": 1.423, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3135999999999998e-07, |
|
"loss": 2.0333, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8914432525634766, |
|
"eval_runtime": 0.7279, |
|
"eval_samples_per_second": 5.495, |
|
"eval_steps_per_second": 1.374, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.3088e-07, |
|
"loss": 2.0316, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8870800733566284, |
|
"eval_runtime": 0.7212, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.386, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.304e-07, |
|
"loss": 2.0114, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8827916383743286, |
|
"eval_runtime": 0.6774, |
|
"eval_samples_per_second": 5.905, |
|
"eval_steps_per_second": 1.476, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.2991999999999998e-07, |
|
"loss": 2.0093, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8788678646087646, |
|
"eval_runtime": 0.5185, |
|
"eval_samples_per_second": 7.715, |
|
"eval_steps_per_second": 1.929, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2944e-07, |
|
"loss": 1.9829, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8749186992645264, |
|
"eval_runtime": 0.5091, |
|
"eval_samples_per_second": 7.857, |
|
"eval_steps_per_second": 1.964, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2895999999999998e-07, |
|
"loss": 1.971, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8706499338150024, |
|
"eval_runtime": 0.5204, |
|
"eval_samples_per_second": 7.687, |
|
"eval_steps_per_second": 1.922, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2848000000000002e-07, |
|
"loss": 2.0188, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8667842149734497, |
|
"eval_runtime": 0.5224, |
|
"eval_samples_per_second": 7.657, |
|
"eval_steps_per_second": 1.914, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.28e-07, |
|
"loss": 2.0081, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8627525568008423, |
|
"eval_runtime": 0.5196, |
|
"eval_samples_per_second": 7.699, |
|
"eval_steps_per_second": 1.925, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2752e-07, |
|
"loss": 2.0014, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8587167263031006, |
|
"eval_runtime": 0.7373, |
|
"eval_samples_per_second": 5.425, |
|
"eval_steps_per_second": 1.356, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2704e-07, |
|
"loss": 1.9741, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8543612957000732, |
|
"eval_runtime": 0.7492, |
|
"eval_samples_per_second": 5.339, |
|
"eval_steps_per_second": 1.335, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.2655999999999999e-07, |
|
"loss": 1.9828, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.8504937887191772, |
|
"eval_runtime": 0.7242, |
|
"eval_samples_per_second": 5.524, |
|
"eval_steps_per_second": 1.381, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2608e-07, |
|
"loss": 1.9481, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.8463339805603027, |
|
"eval_runtime": 0.6997, |
|
"eval_samples_per_second": 5.716, |
|
"eval_steps_per_second": 1.429, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2559999999999998e-07, |
|
"loss": 1.9584, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.8423882722854614, |
|
"eval_runtime": 0.5137, |
|
"eval_samples_per_second": 7.787, |
|
"eval_steps_per_second": 1.947, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2511999999999997e-07, |
|
"loss": 1.9449, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.838066577911377, |
|
"eval_runtime": 0.5091, |
|
"eval_samples_per_second": 7.857, |
|
"eval_steps_per_second": 1.964, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2464e-07, |
|
"loss": 1.9753, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.8342829942703247, |
|
"eval_runtime": 0.504, |
|
"eval_samples_per_second": 7.936, |
|
"eval_steps_per_second": 1.984, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2416e-07, |
|
"loss": 2.0055, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.8300307989120483, |
|
"eval_runtime": 0.5201, |
|
"eval_samples_per_second": 7.691, |
|
"eval_steps_per_second": 1.923, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.2368e-07, |
|
"loss": 1.98, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.8260575532913208, |
|
"eval_runtime": 0.5267, |
|
"eval_samples_per_second": 7.594, |
|
"eval_steps_per_second": 1.898, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.232e-07, |
|
"loss": 1.9757, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8222540616989136, |
|
"eval_runtime": 0.7574, |
|
"eval_samples_per_second": 5.281, |
|
"eval_steps_per_second": 1.32, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.2271999999999997e-07, |
|
"loss": 1.9683, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.818216323852539, |
|
"eval_runtime": 0.7304, |
|
"eval_samples_per_second": 5.476, |
|
"eval_steps_per_second": 1.369, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.2223999999999998e-07, |
|
"loss": 1.926, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8140522241592407, |
|
"eval_runtime": 0.7453, |
|
"eval_samples_per_second": 5.367, |
|
"eval_steps_per_second": 1.342, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.2175999999999997e-07, |
|
"loss": 1.9454, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8100805282592773, |
|
"eval_runtime": 0.6536, |
|
"eval_samples_per_second": 6.12, |
|
"eval_steps_per_second": 1.53, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.2128e-07, |
|
"loss": 1.9352, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8059089183807373, |
|
"eval_runtime": 0.5193, |
|
"eval_samples_per_second": 7.702, |
|
"eval_steps_per_second": 1.926, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.208e-07, |
|
"loss": 1.8816, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.8020563125610352, |
|
"eval_runtime": 0.5265, |
|
"eval_samples_per_second": 7.597, |
|
"eval_steps_per_second": 1.899, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.2032e-07, |
|
"loss": 1.9182, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.7980492115020752, |
|
"eval_runtime": 0.5102, |
|
"eval_samples_per_second": 7.84, |
|
"eval_steps_per_second": 1.96, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1984e-07, |
|
"loss": 1.9659, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.7941217422485352, |
|
"eval_runtime": 0.5988, |
|
"eval_samples_per_second": 6.681, |
|
"eval_steps_per_second": 1.67, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1935999999999997e-07, |
|
"loss": 1.8932, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.7901490926742554, |
|
"eval_runtime": 0.5339, |
|
"eval_samples_per_second": 7.492, |
|
"eval_steps_per_second": 1.873, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1887999999999999e-07, |
|
"loss": 1.8608, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.786109447479248, |
|
"eval_runtime": 0.7219, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 1.385, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.184e-07, |
|
"loss": 1.941, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.7824102640151978, |
|
"eval_runtime": 0.7619, |
|
"eval_samples_per_second": 5.25, |
|
"eval_steps_per_second": 1.313, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.1792e-07, |
|
"loss": 1.8854, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.77846097946167, |
|
"eval_runtime": 0.7601, |
|
"eval_samples_per_second": 5.262, |
|
"eval_steps_per_second": 1.316, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1744e-07, |
|
"loss": 1.8912, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7742952108383179, |
|
"eval_runtime": 0.59, |
|
"eval_samples_per_second": 6.78, |
|
"eval_steps_per_second": 1.695, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1695999999999998e-07, |
|
"loss": 1.8667, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.770714521408081, |
|
"eval_runtime": 0.5262, |
|
"eval_samples_per_second": 7.601, |
|
"eval_steps_per_second": 1.9, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1648e-07, |
|
"loss": 1.912, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7666008472442627, |
|
"eval_runtime": 0.5272, |
|
"eval_samples_per_second": 7.587, |
|
"eval_steps_per_second": 1.897, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1599999999999998e-07, |
|
"loss": 1.9009, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7627824544906616, |
|
"eval_runtime": 0.5295, |
|
"eval_samples_per_second": 7.555, |
|
"eval_steps_per_second": 1.889, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1552000000000001e-07, |
|
"loss": 1.906, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.75889253616333, |
|
"eval_runtime": 0.5589, |
|
"eval_samples_per_second": 7.157, |
|
"eval_steps_per_second": 1.789, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1504e-07, |
|
"loss": 1.8671, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7549973726272583, |
|
"eval_runtime": 0.687, |
|
"eval_samples_per_second": 5.822, |
|
"eval_steps_per_second": 1.456, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.1455999999999998e-07, |
|
"loss": 1.8609, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.7507662773132324, |
|
"eval_runtime": 0.7225, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 1.384, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1408e-07, |
|
"loss": 1.8485, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.746917486190796, |
|
"eval_runtime": 0.7954, |
|
"eval_samples_per_second": 5.029, |
|
"eval_steps_per_second": 1.257, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1359999999999998e-07, |
|
"loss": 1.8334, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.7430514097213745, |
|
"eval_runtime": 0.7433, |
|
"eval_samples_per_second": 5.381, |
|
"eval_steps_per_second": 1.345, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1312e-07, |
|
"loss": 1.8763, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.7392196655273438, |
|
"eval_runtime": 0.5237, |
|
"eval_samples_per_second": 7.638, |
|
"eval_steps_per_second": 1.91, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1263999999999998e-07, |
|
"loss": 1.9005, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.7355214357376099, |
|
"eval_runtime": 0.524, |
|
"eval_samples_per_second": 7.634, |
|
"eval_steps_per_second": 1.908, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1216000000000002e-07, |
|
"loss": 1.8669, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.731513261795044, |
|
"eval_runtime": 0.5593, |
|
"eval_samples_per_second": 7.152, |
|
"eval_steps_per_second": 1.788, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.1168e-07, |
|
"loss": 1.8984, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.727636694908142, |
|
"eval_runtime": 0.5241, |
|
"eval_samples_per_second": 7.632, |
|
"eval_steps_per_second": 1.908, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.1119999999999999e-07, |
|
"loss": 1.8074, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7240556478500366, |
|
"eval_runtime": 0.715, |
|
"eval_samples_per_second": 5.594, |
|
"eval_steps_per_second": 1.399, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.1072e-07, |
|
"loss": 1.8614, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7201639413833618, |
|
"eval_runtime": 0.7611, |
|
"eval_samples_per_second": 5.256, |
|
"eval_steps_per_second": 1.314, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.1023999999999998e-07, |
|
"loss": 1.8211, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7165008783340454, |
|
"eval_runtime": 0.7193, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 1.39, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.0976e-07, |
|
"loss": 1.8553, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7123990058898926, |
|
"eval_runtime": 0.5463, |
|
"eval_samples_per_second": 7.323, |
|
"eval_steps_per_second": 1.831, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.0927999999999998e-07, |
|
"loss": 1.7978, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7084720134735107, |
|
"eval_runtime": 0.574, |
|
"eval_samples_per_second": 6.968, |
|
"eval_steps_per_second": 1.742, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.0879999999999996e-07, |
|
"loss": 1.8203, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.7048146724700928, |
|
"eval_runtime": 0.5838, |
|
"eval_samples_per_second": 6.852, |
|
"eval_steps_per_second": 1.713, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0832e-07, |
|
"loss": 1.8192, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.7010469436645508, |
|
"eval_runtime": 0.5225, |
|
"eval_samples_per_second": 7.656, |
|
"eval_steps_per_second": 1.914, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0784e-07, |
|
"loss": 1.8532, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6973625421524048, |
|
"eval_runtime": 0.525, |
|
"eval_samples_per_second": 7.619, |
|
"eval_steps_per_second": 1.905, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0736e-07, |
|
"loss": 1.8307, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6935136318206787, |
|
"eval_runtime": 0.7235, |
|
"eval_samples_per_second": 5.528, |
|
"eval_steps_per_second": 1.382, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0687999999999998e-07, |
|
"loss": 1.8207, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6895670890808105, |
|
"eval_runtime": 0.8289, |
|
"eval_samples_per_second": 4.826, |
|
"eval_steps_per_second": 1.206, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0639999999999997e-07, |
|
"loss": 1.7895, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6858075857162476, |
|
"eval_runtime": 0.7778, |
|
"eval_samples_per_second": 5.143, |
|
"eval_steps_per_second": 1.286, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.0592e-07, |
|
"loss": 1.7976, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6820955276489258, |
|
"eval_runtime": 0.5265, |
|
"eval_samples_per_second": 7.597, |
|
"eval_steps_per_second": 1.899, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0544e-07, |
|
"loss": 1.814, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6785138845443726, |
|
"eval_runtime": 0.5179, |
|
"eval_samples_per_second": 7.724, |
|
"eval_steps_per_second": 1.931, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0496e-07, |
|
"loss": 1.7972, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.674804449081421, |
|
"eval_runtime": 0.5304, |
|
"eval_samples_per_second": 7.541, |
|
"eval_steps_per_second": 1.885, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0448e-07, |
|
"loss": 1.8258, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6713837385177612, |
|
"eval_runtime": 0.5336, |
|
"eval_samples_per_second": 7.496, |
|
"eval_steps_per_second": 1.874, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.04e-07, |
|
"loss": 1.79, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.667376160621643, |
|
"eval_runtime": 0.7608, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 1.314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0351999999999999e-07, |
|
"loss": 1.802, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6640408039093018, |
|
"eval_runtime": 0.7498, |
|
"eval_samples_per_second": 5.335, |
|
"eval_steps_per_second": 1.334, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0303999999999997e-07, |
|
"loss": 1.7784, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6603385210037231, |
|
"eval_runtime": 0.7501, |
|
"eval_samples_per_second": 5.333, |
|
"eval_steps_per_second": 1.333, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.0256e-07, |
|
"loss": 1.7671, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6568516492843628, |
|
"eval_runtime": 0.5206, |
|
"eval_samples_per_second": 7.684, |
|
"eval_steps_per_second": 1.921, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.0208e-07, |
|
"loss": 1.7618, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.653469443321228, |
|
"eval_runtime": 0.5354, |
|
"eval_samples_per_second": 7.472, |
|
"eval_steps_per_second": 1.868, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.016e-07, |
|
"loss": 1.8207, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6502578258514404, |
|
"eval_runtime": 0.523, |
|
"eval_samples_per_second": 7.648, |
|
"eval_steps_per_second": 1.912, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.0112e-07, |
|
"loss": 1.7837, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6467454433441162, |
|
"eval_runtime": 0.5297, |
|
"eval_samples_per_second": 7.552, |
|
"eval_steps_per_second": 1.888, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.0063999999999998e-07, |
|
"loss": 1.8066, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6439214944839478, |
|
"eval_runtime": 0.522, |
|
"eval_samples_per_second": 7.663, |
|
"eval_steps_per_second": 1.916, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.0016e-07, |
|
"loss": 1.7814, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6407381296157837, |
|
"eval_runtime": 0.5382, |
|
"eval_samples_per_second": 7.432, |
|
"eval_steps_per_second": 1.858, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9967999999999997e-07, |
|
"loss": 1.7244, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6372514963150024, |
|
"eval_runtime": 0.7157, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 1.397, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.992e-07, |
|
"loss": 1.7195, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.634232997894287, |
|
"eval_runtime": 0.7254, |
|
"eval_samples_per_second": 5.514, |
|
"eval_steps_per_second": 1.379, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9872e-07, |
|
"loss": 1.7524, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6310441493988037, |
|
"eval_runtime": 0.7839, |
|
"eval_samples_per_second": 5.103, |
|
"eval_steps_per_second": 1.276, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9824e-07, |
|
"loss": 1.7644, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6279191970825195, |
|
"eval_runtime": 0.5253, |
|
"eval_samples_per_second": 7.615, |
|
"eval_steps_per_second": 1.904, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9776e-07, |
|
"loss": 1.7171, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6244579553604126, |
|
"eval_runtime": 0.5359, |
|
"eval_samples_per_second": 7.464, |
|
"eval_steps_per_second": 1.866, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9727999999999998e-07, |
|
"loss": 1.7418, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6212078332901, |
|
"eval_runtime": 0.5379, |
|
"eval_samples_per_second": 7.436, |
|
"eval_steps_per_second": 1.859, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.968e-07, |
|
"loss": 1.7337, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6180227994918823, |
|
"eval_runtime": 0.5259, |
|
"eval_samples_per_second": 7.606, |
|
"eval_steps_per_second": 1.902, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9631999999999997e-07, |
|
"loss": 1.7441, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.61477530002594, |
|
"eval_runtime": 0.5216, |
|
"eval_samples_per_second": 7.669, |
|
"eval_steps_per_second": 1.917, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9584e-07, |
|
"loss": 1.694, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.611538052558899, |
|
"eval_runtime": 0.6803, |
|
"eval_samples_per_second": 5.88, |
|
"eval_steps_per_second": 1.47, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9536e-07, |
|
"loss": 1.7601, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6083098649978638, |
|
"eval_runtime": 0.716, |
|
"eval_samples_per_second": 5.586, |
|
"eval_steps_per_second": 1.397, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9487999999999998e-07, |
|
"loss": 1.7081, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6050214767456055, |
|
"eval_runtime": 0.7622, |
|
"eval_samples_per_second": 5.248, |
|
"eval_steps_per_second": 1.312, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.944e-07, |
|
"loss": 1.7101, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6019953489303589, |
|
"eval_runtime": 0.7766, |
|
"eval_samples_per_second": 5.151, |
|
"eval_steps_per_second": 1.288, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9391999999999998e-07, |
|
"loss": 1.7271, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.5990221500396729, |
|
"eval_runtime": 0.5153, |
|
"eval_samples_per_second": 7.763, |
|
"eval_steps_per_second": 1.941, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9344e-07, |
|
"loss": 1.7402, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5954092741012573, |
|
"eval_runtime": 0.5168, |
|
"eval_samples_per_second": 7.74, |
|
"eval_steps_per_second": 1.935, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9296e-07, |
|
"loss": 1.7125, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5921534299850464, |
|
"eval_runtime": 0.5424, |
|
"eval_samples_per_second": 7.375, |
|
"eval_steps_per_second": 1.844, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9248e-07, |
|
"loss": 1.6949, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5888370275497437, |
|
"eval_runtime": 0.5307, |
|
"eval_samples_per_second": 7.537, |
|
"eval_steps_per_second": 1.884, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.92e-07, |
|
"loss": 1.7145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5858186483383179, |
|
"eval_runtime": 0.511, |
|
"eval_samples_per_second": 7.828, |
|
"eval_steps_per_second": 1.957, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9151999999999998e-07, |
|
"loss": 1.6665, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5824443101882935, |
|
"eval_runtime": 0.6907, |
|
"eval_samples_per_second": 5.791, |
|
"eval_steps_per_second": 1.448, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9104e-07, |
|
"loss": 1.6929, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5796196460723877, |
|
"eval_runtime": 0.7487, |
|
"eval_samples_per_second": 5.342, |
|
"eval_steps_per_second": 1.336, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.9055999999999998e-07, |
|
"loss": 1.7068, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.5765777826309204, |
|
"eval_runtime": 0.7477, |
|
"eval_samples_per_second": 5.35, |
|
"eval_steps_per_second": 1.337, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9008000000000002e-07, |
|
"loss": 1.6877, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.57340669631958, |
|
"eval_runtime": 0.753, |
|
"eval_samples_per_second": 5.312, |
|
"eval_steps_per_second": 1.328, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.896e-07, |
|
"loss": 1.6718, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.5706267356872559, |
|
"eval_runtime": 0.514, |
|
"eval_samples_per_second": 7.782, |
|
"eval_steps_per_second": 1.945, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8912e-07, |
|
"loss": 1.6886, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.5676339864730835, |
|
"eval_runtime": 0.5222, |
|
"eval_samples_per_second": 7.66, |
|
"eval_steps_per_second": 1.915, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8864e-07, |
|
"loss": 1.7459, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.5645827054977417, |
|
"eval_runtime": 0.5299, |
|
"eval_samples_per_second": 7.548, |
|
"eval_steps_per_second": 1.887, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8815999999999999e-07, |
|
"loss": 1.6596, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.5616861581802368, |
|
"eval_runtime": 0.5303, |
|
"eval_samples_per_second": 7.543, |
|
"eval_steps_per_second": 1.886, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8768e-07, |
|
"loss": 1.6689, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.5588451623916626, |
|
"eval_runtime": 0.5236, |
|
"eval_samples_per_second": 7.639, |
|
"eval_steps_per_second": 1.91, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8719999999999998e-07, |
|
"loss": 1.6744, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.5560673475265503, |
|
"eval_runtime": 0.7233, |
|
"eval_samples_per_second": 5.53, |
|
"eval_steps_per_second": 1.383, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8671999999999997e-07, |
|
"loss": 1.7009, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.5533243417739868, |
|
"eval_runtime": 0.6983, |
|
"eval_samples_per_second": 5.728, |
|
"eval_steps_per_second": 1.432, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8624e-07, |
|
"loss": 1.6651, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.55048668384552, |
|
"eval_runtime": 0.7511, |
|
"eval_samples_per_second": 5.325, |
|
"eval_steps_per_second": 1.331, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8576e-07, |
|
"loss": 1.6821, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.547943353652954, |
|
"eval_runtime": 0.532, |
|
"eval_samples_per_second": 7.519, |
|
"eval_steps_per_second": 1.88, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8528e-07, |
|
"loss": 1.6453, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.5453405380249023, |
|
"eval_runtime": 0.5463, |
|
"eval_samples_per_second": 7.322, |
|
"eval_steps_per_second": 1.831, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.848e-07, |
|
"loss": 1.6624, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.542648196220398, |
|
"eval_runtime": 0.5288, |
|
"eval_samples_per_second": 7.564, |
|
"eval_steps_per_second": 1.891, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8431999999999997e-07, |
|
"loss": 1.6453, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.5402462482452393, |
|
"eval_runtime": 0.5242, |
|
"eval_samples_per_second": 7.63, |
|
"eval_steps_per_second": 1.908, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8383999999999998e-07, |
|
"loss": 1.6451, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.5377165079116821, |
|
"eval_runtime": 0.5169, |
|
"eval_samples_per_second": 7.738, |
|
"eval_steps_per_second": 1.935, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8335999999999997e-07, |
|
"loss": 1.6627, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.5353412628173828, |
|
"eval_runtime": 0.6797, |
|
"eval_samples_per_second": 5.885, |
|
"eval_steps_per_second": 1.471, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8288e-07, |
|
"loss": 1.6423, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.5325669050216675, |
|
"eval_runtime": 0.7175, |
|
"eval_samples_per_second": 5.575, |
|
"eval_steps_per_second": 1.394, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.824e-07, |
|
"loss": 1.652, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.530207872390747, |
|
"eval_runtime": 0.8099, |
|
"eval_samples_per_second": 4.939, |
|
"eval_steps_per_second": 1.235, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8192e-07, |
|
"loss": 1.6414, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.5278236865997314, |
|
"eval_runtime": 0.7814, |
|
"eval_samples_per_second": 5.119, |
|
"eval_steps_per_second": 1.28, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8144e-07, |
|
"loss": 1.6107, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5253430604934692, |
|
"eval_runtime": 0.5386, |
|
"eval_samples_per_second": 7.427, |
|
"eval_steps_per_second": 1.857, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8095999999999997e-07, |
|
"loss": 1.6599, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5225120782852173, |
|
"eval_runtime": 0.5302, |
|
"eval_samples_per_second": 7.544, |
|
"eval_steps_per_second": 1.886, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8048e-07, |
|
"loss": 1.6326, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5201939344406128, |
|
"eval_runtime": 0.533, |
|
"eval_samples_per_second": 7.505, |
|
"eval_steps_per_second": 1.876, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8e-07, |
|
"loss": 1.6324, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5175316333770752, |
|
"eval_runtime": 0.5316, |
|
"eval_samples_per_second": 7.525, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7952e-07, |
|
"loss": 1.5907, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5149424076080322, |
|
"eval_runtime": 0.7298, |
|
"eval_samples_per_second": 5.481, |
|
"eval_steps_per_second": 1.37, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7904e-07, |
|
"loss": 1.6465, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.5124318599700928, |
|
"eval_runtime": 0.7308, |
|
"eval_samples_per_second": 5.473, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7855999999999998e-07, |
|
"loss": 1.6148, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.510151743888855, |
|
"eval_runtime": 0.7345, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 1.361, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7808e-07, |
|
"loss": 1.6064, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.5073630809783936, |
|
"eval_runtime": 0.5414, |
|
"eval_samples_per_second": 7.388, |
|
"eval_steps_per_second": 1.847, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7759999999999998e-07, |
|
"loss": 1.6342, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.5052520036697388, |
|
"eval_runtime": 0.516, |
|
"eval_samples_per_second": 7.751, |
|
"eval_steps_per_second": 1.938, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7712000000000001e-07, |
|
"loss": 1.605, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.5025243759155273, |
|
"eval_runtime": 0.5373, |
|
"eval_samples_per_second": 7.445, |
|
"eval_steps_per_second": 1.861, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7664e-07, |
|
"loss": 1.6121, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.500252604484558, |
|
"eval_runtime": 0.5476, |
|
"eval_samples_per_second": 7.304, |
|
"eval_steps_per_second": 1.826, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7616e-07, |
|
"loss": 1.617, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.4977892637252808, |
|
"eval_runtime": 0.5255, |
|
"eval_samples_per_second": 7.612, |
|
"eval_steps_per_second": 1.903, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7568e-07, |
|
"loss": 1.5897, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.4954513311386108, |
|
"eval_runtime": 0.7255, |
|
"eval_samples_per_second": 5.513, |
|
"eval_steps_per_second": 1.378, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7519999999999998e-07, |
|
"loss": 1.6022, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.4929691553115845, |
|
"eval_runtime": 0.6954, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 1.438, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7472e-07, |
|
"loss": 1.5748, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.4902769327163696, |
|
"eval_runtime": 0.8026, |
|
"eval_samples_per_second": 4.984, |
|
"eval_steps_per_second": 1.246, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7423999999999998e-07, |
|
"loss": 1.5974, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.4878779649734497, |
|
"eval_runtime": 0.7804, |
|
"eval_samples_per_second": 5.125, |
|
"eval_steps_per_second": 1.281, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7376000000000002e-07, |
|
"loss": 1.6126, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.48554527759552, |
|
"eval_runtime": 0.5423, |
|
"eval_samples_per_second": 7.376, |
|
"eval_steps_per_second": 1.844, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7328e-07, |
|
"loss": 1.6189, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.4827589988708496, |
|
"eval_runtime": 0.5326, |
|
"eval_samples_per_second": 7.511, |
|
"eval_steps_per_second": 1.878, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7279999999999999e-07, |
|
"loss": 1.5916, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.4803836345672607, |
|
"eval_runtime": 0.5273, |
|
"eval_samples_per_second": 7.585, |
|
"eval_steps_per_second": 1.896, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7232e-07, |
|
"loss": 1.5938, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.4778516292572021, |
|
"eval_runtime": 0.5436, |
|
"eval_samples_per_second": 7.358, |
|
"eval_steps_per_second": 1.839, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7183999999999998e-07, |
|
"loss": 1.6026, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.475649118423462, |
|
"eval_runtime": 0.5298, |
|
"eval_samples_per_second": 7.549, |
|
"eval_steps_per_second": 1.887, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7136e-07, |
|
"loss": 1.5687, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.473489761352539, |
|
"eval_runtime": 0.7191, |
|
"eval_samples_per_second": 5.562, |
|
"eval_steps_per_second": 1.391, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7087999999999998e-07, |
|
"loss": 1.5413, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.4712145328521729, |
|
"eval_runtime": 0.7022, |
|
"eval_samples_per_second": 5.696, |
|
"eval_steps_per_second": 1.424, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7039999999999996e-07, |
|
"loss": 1.5778, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.4688694477081299, |
|
"eval_runtime": 0.7707, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 1.298, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.6992e-07, |
|
"loss": 1.5731, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.4664225578308105, |
|
"eval_runtime": 0.5386, |
|
"eval_samples_per_second": 7.427, |
|
"eval_steps_per_second": 1.857, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6944e-07, |
|
"loss": 1.5625, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.464247465133667, |
|
"eval_runtime": 0.5484, |
|
"eval_samples_per_second": 7.294, |
|
"eval_steps_per_second": 1.823, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6896e-07, |
|
"loss": 1.55, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4620987176895142, |
|
"eval_runtime": 0.5342, |
|
"eval_samples_per_second": 7.488, |
|
"eval_steps_per_second": 1.872, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6847999999999998e-07, |
|
"loss": 1.5852, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.459930419921875, |
|
"eval_runtime": 0.5332, |
|
"eval_samples_per_second": 7.501, |
|
"eval_steps_per_second": 1.875, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.68e-07, |
|
"loss": 1.5614, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4578797817230225, |
|
"eval_runtime": 0.5504, |
|
"eval_samples_per_second": 7.268, |
|
"eval_steps_per_second": 1.817, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6752e-07, |
|
"loss": 1.5619, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4559952020645142, |
|
"eval_runtime": 0.7448, |
|
"eval_samples_per_second": 5.37, |
|
"eval_steps_per_second": 1.343, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6704e-07, |
|
"loss": 1.5658, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.454249382019043, |
|
"eval_runtime": 0.773, |
|
"eval_samples_per_second": 5.174, |
|
"eval_steps_per_second": 1.294, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.6656e-07, |
|
"loss": 1.5699, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.4521348476409912, |
|
"eval_runtime": 0.8287, |
|
"eval_samples_per_second": 4.827, |
|
"eval_steps_per_second": 1.207, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6608e-07, |
|
"loss": 1.5738, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.450175404548645, |
|
"eval_runtime": 0.5398, |
|
"eval_samples_per_second": 7.41, |
|
"eval_steps_per_second": 1.852, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.656e-07, |
|
"loss": 1.5823, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.4481428861618042, |
|
"eval_runtime": 0.5592, |
|
"eval_samples_per_second": 7.153, |
|
"eval_steps_per_second": 1.788, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6511999999999999e-07, |
|
"loss": 1.5425, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.4458932876586914, |
|
"eval_runtime": 0.5511, |
|
"eval_samples_per_second": 7.259, |
|
"eval_steps_per_second": 1.815, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6463999999999997e-07, |
|
"loss": 1.5604, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.4438304901123047, |
|
"eval_runtime": 0.5355, |
|
"eval_samples_per_second": 7.47, |
|
"eval_steps_per_second": 1.867, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6416e-07, |
|
"loss": 1.5562, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.442002773284912, |
|
"eval_runtime": 0.5332, |
|
"eval_samples_per_second": 7.502, |
|
"eval_steps_per_second": 1.876, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6368e-07, |
|
"loss": 1.555, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.4399393796920776, |
|
"eval_runtime": 0.7524, |
|
"eval_samples_per_second": 5.316, |
|
"eval_steps_per_second": 1.329, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.632e-07, |
|
"loss": 1.5158, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.437983512878418, |
|
"eval_runtime": 0.7269, |
|
"eval_samples_per_second": 5.503, |
|
"eval_steps_per_second": 1.376, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6272e-07, |
|
"loss": 1.5272, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.435863733291626, |
|
"eval_runtime": 0.7356, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 1.359, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6223999999999998e-07, |
|
"loss": 1.5467, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.4338979721069336, |
|
"eval_runtime": 0.5695, |
|
"eval_samples_per_second": 7.023, |
|
"eval_steps_per_second": 1.756, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6176e-07, |
|
"loss": 1.5399, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.4317151308059692, |
|
"eval_runtime": 0.5215, |
|
"eval_samples_per_second": 7.669, |
|
"eval_steps_per_second": 1.917, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6127999999999997e-07, |
|
"loss": 1.5221, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.4296718835830688, |
|
"eval_runtime": 0.5471, |
|
"eval_samples_per_second": 7.311, |
|
"eval_steps_per_second": 1.828, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.608e-07, |
|
"loss": 1.5022, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.4277141094207764, |
|
"eval_runtime": 0.5395, |
|
"eval_samples_per_second": 7.414, |
|
"eval_steps_per_second": 1.853, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6032e-07, |
|
"loss": 1.5385, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.4257354736328125, |
|
"eval_runtime": 0.5342, |
|
"eval_samples_per_second": 7.487, |
|
"eval_steps_per_second": 1.872, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5984e-07, |
|
"loss": 1.5042, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.4236301183700562, |
|
"eval_runtime": 0.6434, |
|
"eval_samples_per_second": 6.217, |
|
"eval_steps_per_second": 1.554, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5936e-07, |
|
"loss": 1.5007, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.421656608581543, |
|
"eval_runtime": 0.7224, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5887999999999998e-07, |
|
"loss": 1.5323, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.4196075201034546, |
|
"eval_runtime": 0.7946, |
|
"eval_samples_per_second": 5.034, |
|
"eval_steps_per_second": 1.259, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.584e-07, |
|
"loss": 1.5269, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.4174154996871948, |
|
"eval_runtime": 0.82, |
|
"eval_samples_per_second": 4.878, |
|
"eval_steps_per_second": 1.22, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5791999999999997e-07, |
|
"loss": 1.5379, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.4156051874160767, |
|
"eval_runtime": 0.5319, |
|
"eval_samples_per_second": 7.52, |
|
"eval_steps_per_second": 1.88, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5744e-07, |
|
"loss": 1.522, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.4136687517166138, |
|
"eval_runtime": 0.5286, |
|
"eval_samples_per_second": 7.567, |
|
"eval_steps_per_second": 1.892, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5696e-07, |
|
"loss": 1.506, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.4115678071975708, |
|
"eval_runtime": 0.553, |
|
"eval_samples_per_second": 7.233, |
|
"eval_steps_per_second": 1.808, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5647999999999998e-07, |
|
"loss": 1.4986, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.409631371498108, |
|
"eval_runtime": 0.5273, |
|
"eval_samples_per_second": 7.585, |
|
"eval_steps_per_second": 1.896, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.56e-07, |
|
"loss": 1.4918, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.407455563545227, |
|
"eval_runtime": 0.5269, |
|
"eval_samples_per_second": 7.592, |
|
"eval_steps_per_second": 1.898, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5551999999999998e-07, |
|
"loss": 1.5124, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.4056380987167358, |
|
"eval_runtime": 0.7536, |
|
"eval_samples_per_second": 5.308, |
|
"eval_steps_per_second": 1.327, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5504000000000002e-07, |
|
"loss": 1.4926, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.403800368309021, |
|
"eval_runtime": 0.7248, |
|
"eval_samples_per_second": 5.519, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5456e-07, |
|
"loss": 1.5053, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.40152907371521, |
|
"eval_runtime": 0.7447, |
|
"eval_samples_per_second": 5.371, |
|
"eval_steps_per_second": 1.343, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5408e-07, |
|
"loss": 1.5043, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3996310234069824, |
|
"eval_runtime": 0.738, |
|
"eval_samples_per_second": 5.42, |
|
"eval_steps_per_second": 1.355, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.536e-07, |
|
"loss": 1.5068, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3975541591644287, |
|
"eval_runtime": 0.5275, |
|
"eval_samples_per_second": 7.583, |
|
"eval_steps_per_second": 1.896, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5311999999999998e-07, |
|
"loss": 1.5039, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3954721689224243, |
|
"eval_runtime": 0.5317, |
|
"eval_samples_per_second": 7.523, |
|
"eval_steps_per_second": 1.881, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5264e-07, |
|
"loss": 1.4772, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3933203220367432, |
|
"eval_runtime": 0.5283, |
|
"eval_samples_per_second": 7.571, |
|
"eval_steps_per_second": 1.893, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5215999999999998e-07, |
|
"loss": 1.4873, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3916254043579102, |
|
"eval_runtime": 0.5344, |
|
"eval_samples_per_second": 7.485, |
|
"eval_steps_per_second": 1.871, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5168000000000002e-07, |
|
"loss": 1.4977, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.3896205425262451, |
|
"eval_runtime": 0.5249, |
|
"eval_samples_per_second": 7.62, |
|
"eval_steps_per_second": 1.905, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.512e-07, |
|
"loss": 1.5016, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.3873213529586792, |
|
"eval_runtime": 0.7136, |
|
"eval_samples_per_second": 5.605, |
|
"eval_steps_per_second": 1.401, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5072e-07, |
|
"loss": 1.495, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.3854175806045532, |
|
"eval_runtime": 0.7372, |
|
"eval_samples_per_second": 5.426, |
|
"eval_steps_per_second": 1.357, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5024e-07, |
|
"loss": 1.4803, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.3834645748138428, |
|
"eval_runtime": 0.7836, |
|
"eval_samples_per_second": 5.104, |
|
"eval_steps_per_second": 1.276, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4975999999999999e-07, |
|
"loss": 1.4842, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.381633996963501, |
|
"eval_runtime": 0.5401, |
|
"eval_samples_per_second": 7.405, |
|
"eval_steps_per_second": 1.851, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4928e-07, |
|
"loss": 1.4762, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.379853367805481, |
|
"eval_runtime": 0.5233, |
|
"eval_samples_per_second": 7.644, |
|
"eval_steps_per_second": 1.911, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4879999999999998e-07, |
|
"loss": 1.4859, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.3780815601348877, |
|
"eval_runtime": 0.5276, |
|
"eval_samples_per_second": 7.582, |
|
"eval_steps_per_second": 1.895, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4832e-07, |
|
"loss": 1.4948, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.3763624429702759, |
|
"eval_runtime": 0.5355, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 1.867, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4784e-07, |
|
"loss": 1.4851, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.374289631843567, |
|
"eval_runtime": 0.5235, |
|
"eval_samples_per_second": 7.64, |
|
"eval_steps_per_second": 1.91, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4736e-07, |
|
"loss": 1.4749, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.3724154233932495, |
|
"eval_runtime": 0.6384, |
|
"eval_samples_per_second": 6.266, |
|
"eval_steps_per_second": 1.566, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4687999999999998e-07, |
|
"loss": 1.4594, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.3709261417388916, |
|
"eval_runtime": 0.7249, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 1.379, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.464e-07, |
|
"loss": 1.4517, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.3691537380218506, |
|
"eval_runtime": 0.7303, |
|
"eval_samples_per_second": 5.477, |
|
"eval_steps_per_second": 1.369, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4592e-07, |
|
"loss": 1.4239, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.3673396110534668, |
|
"eval_runtime": 0.7929, |
|
"eval_samples_per_second": 5.044, |
|
"eval_steps_per_second": 1.261, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4543999999999998e-07, |
|
"loss": 1.4775, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3657190799713135, |
|
"eval_runtime": 0.5509, |
|
"eval_samples_per_second": 7.261, |
|
"eval_steps_per_second": 1.815, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4496e-07, |
|
"loss": 1.4483, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3642776012420654, |
|
"eval_runtime": 0.5236, |
|
"eval_samples_per_second": 7.639, |
|
"eval_steps_per_second": 1.91, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4447999999999998e-07, |
|
"loss": 1.4688, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3624374866485596, |
|
"eval_runtime": 0.5281, |
|
"eval_samples_per_second": 7.574, |
|
"eval_steps_per_second": 1.893, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.44e-07, |
|
"loss": 1.4566, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3608499765396118, |
|
"eval_runtime": 0.5346, |
|
"eval_samples_per_second": 7.482, |
|
"eval_steps_per_second": 1.871, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4352e-07, |
|
"loss": 1.4592, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3591777086257935, |
|
"eval_runtime": 0.543, |
|
"eval_samples_per_second": 7.367, |
|
"eval_steps_per_second": 1.842, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4304e-07, |
|
"loss": 1.4505, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.357291340827942, |
|
"eval_runtime": 0.7548, |
|
"eval_samples_per_second": 5.299, |
|
"eval_steps_per_second": 1.325, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4256e-07, |
|
"loss": 1.4304, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.3557498455047607, |
|
"eval_runtime": 0.7262, |
|
"eval_samples_per_second": 5.508, |
|
"eval_steps_per_second": 1.377, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4208e-07, |
|
"loss": 1.4691, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.3540558815002441, |
|
"eval_runtime": 0.7121, |
|
"eval_samples_per_second": 5.617, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.416e-07, |
|
"loss": 1.4423, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.3522251844406128, |
|
"eval_runtime": 0.7774, |
|
"eval_samples_per_second": 5.145, |
|
"eval_steps_per_second": 1.286, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4111999999999998e-07, |
|
"loss": 1.4301, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.3508257865905762, |
|
"eval_runtime": 0.5433, |
|
"eval_samples_per_second": 7.362, |
|
"eval_steps_per_second": 1.841, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4064e-07, |
|
"loss": 1.4422, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.3490896224975586, |
|
"eval_runtime": 0.5369, |
|
"eval_samples_per_second": 7.451, |
|
"eval_steps_per_second": 1.863, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4016e-07, |
|
"loss": 1.4577, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.347461223602295, |
|
"eval_runtime": 0.5223, |
|
"eval_samples_per_second": 7.658, |
|
"eval_steps_per_second": 1.915, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.3968e-07, |
|
"loss": 1.4541, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.3457545042037964, |
|
"eval_runtime": 0.5399, |
|
"eval_samples_per_second": 7.409, |
|
"eval_steps_per_second": 1.852, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.392e-07, |
|
"loss": 1.4246, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.343980073928833, |
|
"eval_runtime": 0.5481, |
|
"eval_samples_per_second": 7.297, |
|
"eval_steps_per_second": 1.824, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3872e-07, |
|
"loss": 1.4507, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3423739671707153, |
|
"eval_runtime": 0.7414, |
|
"eval_samples_per_second": 5.395, |
|
"eval_steps_per_second": 1.349, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3824e-07, |
|
"loss": 1.4312, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3408253192901611, |
|
"eval_runtime": 0.7783, |
|
"eval_samples_per_second": 5.139, |
|
"eval_steps_per_second": 1.285, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3775999999999998e-07, |
|
"loss": 1.4394, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.339220404624939, |
|
"eval_runtime": 0.771, |
|
"eval_samples_per_second": 5.188, |
|
"eval_steps_per_second": 1.297, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.3728e-07, |
|
"loss": 1.4271, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3373547792434692, |
|
"eval_runtime": 0.5264, |
|
"eval_samples_per_second": 7.599, |
|
"eval_steps_per_second": 1.9, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.368e-07, |
|
"loss": 1.4081, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.3356679677963257, |
|
"eval_runtime": 0.5397, |
|
"eval_samples_per_second": 7.412, |
|
"eval_steps_per_second": 1.853, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3632e-07, |
|
"loss": 1.4314, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.333927035331726, |
|
"eval_runtime": 0.5418, |
|
"eval_samples_per_second": 7.382, |
|
"eval_steps_per_second": 1.846, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3583999999999998e-07, |
|
"loss": 1.4359, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3325647115707397, |
|
"eval_runtime": 0.5464, |
|
"eval_samples_per_second": 7.321, |
|
"eval_steps_per_second": 1.83, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3536e-07, |
|
"loss": 1.4381, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3307887315750122, |
|
"eval_runtime": 0.5493, |
|
"eval_samples_per_second": 7.282, |
|
"eval_steps_per_second": 1.82, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3488e-07, |
|
"loss": 1.4219, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3293663263320923, |
|
"eval_runtime": 0.5921, |
|
"eval_samples_per_second": 6.755, |
|
"eval_steps_per_second": 1.689, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3439999999999999e-07, |
|
"loss": 1.4669, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3278565406799316, |
|
"eval_runtime": 0.7788, |
|
"eval_samples_per_second": 5.136, |
|
"eval_steps_per_second": 1.284, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.3392e-07, |
|
"loss": 1.4163, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3260074853897095, |
|
"eval_runtime": 0.8128, |
|
"eval_samples_per_second": 4.921, |
|
"eval_steps_per_second": 1.23, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3343999999999998e-07, |
|
"loss": 1.4153, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3242360353469849, |
|
"eval_runtime": 0.8002, |
|
"eval_samples_per_second": 4.999, |
|
"eval_steps_per_second": 1.25, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3296e-07, |
|
"loss": 1.4506, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3229784965515137, |
|
"eval_runtime": 0.5395, |
|
"eval_samples_per_second": 7.414, |
|
"eval_steps_per_second": 1.854, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3247999999999998e-07, |
|
"loss": 1.4229, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3213036060333252, |
|
"eval_runtime": 0.5374, |
|
"eval_samples_per_second": 7.444, |
|
"eval_steps_per_second": 1.861, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.32e-07, |
|
"loss": 1.4218, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3196250200271606, |
|
"eval_runtime": 0.5404, |
|
"eval_samples_per_second": 7.402, |
|
"eval_steps_per_second": 1.851, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3152e-07, |
|
"loss": 1.4185, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3180840015411377, |
|
"eval_runtime": 0.5573, |
|
"eval_samples_per_second": 7.177, |
|
"eval_steps_per_second": 1.794, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3104e-07, |
|
"loss": 1.4283, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.316424012184143, |
|
"eval_runtime": 0.5204, |
|
"eval_samples_per_second": 7.686, |
|
"eval_steps_per_second": 1.922, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.3056e-07, |
|
"loss": 1.4202, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.3148062229156494, |
|
"eval_runtime": 0.7628, |
|
"eval_samples_per_second": 5.244, |
|
"eval_steps_per_second": 1.311, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.3007999999999998e-07, |
|
"loss": 1.3736, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.3131170272827148, |
|
"eval_runtime": 0.7763, |
|
"eval_samples_per_second": 5.153, |
|
"eval_steps_per_second": 1.288, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.296e-07, |
|
"loss": 1.4332, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.311560869216919, |
|
"eval_runtime": 0.7312, |
|
"eval_samples_per_second": 5.471, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2912e-07, |
|
"loss": 1.4287, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.309916615486145, |
|
"eval_runtime": 0.6738, |
|
"eval_samples_per_second": 5.936, |
|
"eval_steps_per_second": 1.484, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2864e-07, |
|
"loss": 1.4175, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.3080803155899048, |
|
"eval_runtime": 0.5396, |
|
"eval_samples_per_second": 7.412, |
|
"eval_steps_per_second": 1.853, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2816e-07, |
|
"loss": 1.4152, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.3066335916519165, |
|
"eval_runtime": 0.5523, |
|
"eval_samples_per_second": 7.243, |
|
"eval_steps_per_second": 1.811, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.2768e-07, |
|
"loss": 1.4036, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.3054327964782715, |
|
"eval_runtime": 0.5404, |
|
"eval_samples_per_second": 7.402, |
|
"eval_steps_per_second": 1.851, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.272e-07, |
|
"loss": 1.4033, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.3037904500961304, |
|
"eval_runtime": 0.5534, |
|
"eval_samples_per_second": 7.228, |
|
"eval_steps_per_second": 1.807, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2671999999999999e-07, |
|
"loss": 1.4095, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.302278757095337, |
|
"eval_runtime": 0.7546, |
|
"eval_samples_per_second": 5.301, |
|
"eval_steps_per_second": 1.325, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2624e-07, |
|
"loss": 1.4129, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.3008112907409668, |
|
"eval_runtime": 0.7157, |
|
"eval_samples_per_second": 5.589, |
|
"eval_steps_per_second": 1.397, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2576e-07, |
|
"loss": 1.3838, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.2994916439056396, |
|
"eval_runtime": 0.7773, |
|
"eval_samples_per_second": 5.146, |
|
"eval_steps_per_second": 1.286, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2528e-07, |
|
"loss": 1.3939, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.2979990243911743, |
|
"eval_runtime": 0.8203, |
|
"eval_samples_per_second": 4.876, |
|
"eval_steps_per_second": 1.219, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.2479999999999998e-07, |
|
"loss": 1.4023, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.2964202165603638, |
|
"eval_runtime": 0.5392, |
|
"eval_samples_per_second": 7.419, |
|
"eval_steps_per_second": 1.855, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2432e-07, |
|
"loss": 1.3751, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.2952665090560913, |
|
"eval_runtime": 0.533, |
|
"eval_samples_per_second": 7.505, |
|
"eval_steps_per_second": 1.876, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2384e-07, |
|
"loss": 1.3657, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.2935295104980469, |
|
"eval_runtime": 0.5428, |
|
"eval_samples_per_second": 7.369, |
|
"eval_steps_per_second": 1.842, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2336e-07, |
|
"loss": 1.375, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.292738914489746, |
|
"eval_runtime": 0.5365, |
|
"eval_samples_per_second": 7.456, |
|
"eval_steps_per_second": 1.864, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2288e-07, |
|
"loss": 1.3846, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.291104793548584, |
|
"eval_runtime": 0.5462, |
|
"eval_samples_per_second": 7.323, |
|
"eval_steps_per_second": 1.831, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2239999999999998e-07, |
|
"loss": 1.4192, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.2900675535202026, |
|
"eval_runtime": 0.7504, |
|
"eval_samples_per_second": 5.33, |
|
"eval_steps_per_second": 1.333, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.2192e-07, |
|
"loss": 1.3629, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.2886391878128052, |
|
"eval_runtime": 0.7924, |
|
"eval_samples_per_second": 5.048, |
|
"eval_steps_per_second": 1.262, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.2143999999999998e-07, |
|
"loss": 1.3947, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.287713646888733, |
|
"eval_runtime": 0.7522, |
|
"eval_samples_per_second": 5.318, |
|
"eval_steps_per_second": 1.329, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.2096e-07, |
|
"loss": 1.3485, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2862787246704102, |
|
"eval_runtime": 0.5402, |
|
"eval_samples_per_second": 7.404, |
|
"eval_steps_per_second": 1.851, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.2048e-07, |
|
"loss": 1.405, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2850462198257446, |
|
"eval_runtime": 0.5452, |
|
"eval_samples_per_second": 7.337, |
|
"eval_steps_per_second": 1.834, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.2e-07, |
|
"loss": 1.3758, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2840522527694702, |
|
"eval_runtime": 0.541, |
|
"eval_samples_per_second": 7.394, |
|
"eval_steps_per_second": 1.849, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.1951999999999997e-07, |
|
"loss": 1.3832, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.282808542251587, |
|
"eval_runtime": 0.5449, |
|
"eval_samples_per_second": 7.34, |
|
"eval_steps_per_second": 1.835, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.1903999999999999e-07, |
|
"loss": 1.3314, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2814455032348633, |
|
"eval_runtime": 0.5367, |
|
"eval_samples_per_second": 7.453, |
|
"eval_steps_per_second": 1.863, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.1856e-07, |
|
"loss": 1.3458, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.2800332307815552, |
|
"eval_runtime": 0.5306, |
|
"eval_samples_per_second": 7.538, |
|
"eval_steps_per_second": 1.884, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1808e-07, |
|
"loss": 1.357, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2792009115219116, |
|
"eval_runtime": 0.545, |
|
"eval_samples_per_second": 7.34, |
|
"eval_steps_per_second": 1.835, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.176e-07, |
|
"loss": 1.3808, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2777525186538696, |
|
"eval_runtime": 0.5523, |
|
"eval_samples_per_second": 7.243, |
|
"eval_steps_per_second": 1.811, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1712e-07, |
|
"loss": 1.3692, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2765049934387207, |
|
"eval_runtime": 0.752, |
|
"eval_samples_per_second": 5.319, |
|
"eval_steps_per_second": 1.33, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1663999999999999e-07, |
|
"loss": 1.3763, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2754418849945068, |
|
"eval_runtime": 0.7731, |
|
"eval_samples_per_second": 5.174, |
|
"eval_steps_per_second": 1.293, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1615999999999999e-07, |
|
"loss": 1.3505, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2741388082504272, |
|
"eval_runtime": 0.798, |
|
"eval_samples_per_second": 5.012, |
|
"eval_steps_per_second": 1.253, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.1567999999999999e-07, |
|
"loss": 1.3579, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.273074746131897, |
|
"eval_runtime": 0.5381, |
|
"eval_samples_per_second": 7.433, |
|
"eval_steps_per_second": 1.858, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.152e-07, |
|
"loss": 1.3567, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.2716789245605469, |
|
"eval_runtime": 0.5423, |
|
"eval_samples_per_second": 7.376, |
|
"eval_steps_per_second": 1.844, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.1472e-07, |
|
"loss": 1.3608, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.270503044128418, |
|
"eval_runtime": 0.538, |
|
"eval_samples_per_second": 7.435, |
|
"eval_steps_per_second": 1.859, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.1424000000000001e-07, |
|
"loss": 1.3895, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.269209384918213, |
|
"eval_runtime": 0.5287, |
|
"eval_samples_per_second": 7.566, |
|
"eval_steps_per_second": 1.892, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.1376e-07, |
|
"loss": 1.3614, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.2682329416275024, |
|
"eval_runtime": 0.5516, |
|
"eval_samples_per_second": 7.252, |
|
"eval_steps_per_second": 1.813, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.1327999999999999e-07, |
|
"loss": 1.3643, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.2671722173690796, |
|
"eval_runtime": 0.7443, |
|
"eval_samples_per_second": 5.374, |
|
"eval_steps_per_second": 1.344, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.1279999999999999e-07, |
|
"loss": 1.3537, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.2658395767211914, |
|
"eval_runtime": 0.7724, |
|
"eval_samples_per_second": 5.179, |
|
"eval_steps_per_second": 1.295, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.1232e-07, |
|
"loss": 1.3564, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.2647554874420166, |
|
"eval_runtime": 0.7818, |
|
"eval_samples_per_second": 5.116, |
|
"eval_steps_per_second": 1.279, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.1184e-07, |
|
"loss": 1.3433, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.263366937637329, |
|
"eval_runtime": 0.7501, |
|
"eval_samples_per_second": 5.333, |
|
"eval_steps_per_second": 1.333, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.1135999999999999e-07, |
|
"loss": 1.3805, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.2624878883361816, |
|
"eval_runtime": 0.5467, |
|
"eval_samples_per_second": 7.316, |
|
"eval_steps_per_second": 1.829, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.1087999999999998e-07, |
|
"loss": 1.3309, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.2612154483795166, |
|
"eval_runtime": 0.5298, |
|
"eval_samples_per_second": 7.55, |
|
"eval_steps_per_second": 1.888, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.104e-07, |
|
"loss": 1.3408, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.260028600692749, |
|
"eval_runtime": 0.5364, |
|
"eval_samples_per_second": 7.457, |
|
"eval_steps_per_second": 1.864, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.0992e-07, |
|
"loss": 1.3505, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.2590596675872803, |
|
"eval_runtime": 0.5419, |
|
"eval_samples_per_second": 7.381, |
|
"eval_steps_per_second": 1.845, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0943999999999999e-07, |
|
"loss": 1.355, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.258219838142395, |
|
"eval_runtime": 0.5356, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 1.867, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0896e-07, |
|
"loss": 1.3426, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2570216655731201, |
|
"eval_runtime": 0.7481, |
|
"eval_samples_per_second": 5.347, |
|
"eval_steps_per_second": 1.337, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0847999999999999e-07, |
|
"loss": 1.3476, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2555859088897705, |
|
"eval_runtime": 0.7344, |
|
"eval_samples_per_second": 5.447, |
|
"eval_steps_per_second": 1.362, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0799999999999999e-07, |
|
"loss": 1.3448, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2546257972717285, |
|
"eval_runtime": 0.8107, |
|
"eval_samples_per_second": 4.934, |
|
"eval_steps_per_second": 1.233, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0752e-07, |
|
"loss": 1.3702, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2532060146331787, |
|
"eval_runtime": 0.5269, |
|
"eval_samples_per_second": 7.592, |
|
"eval_steps_per_second": 1.898, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0704e-07, |
|
"loss": 1.3198, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2521923780441284, |
|
"eval_runtime": 0.5439, |
|
"eval_samples_per_second": 7.354, |
|
"eval_steps_per_second": 1.839, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.0656e-07, |
|
"loss": 1.3565, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.2510970830917358, |
|
"eval_runtime": 0.548, |
|
"eval_samples_per_second": 7.3, |
|
"eval_steps_per_second": 1.825, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0608000000000001e-07, |
|
"loss": 1.3496, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2498608827590942, |
|
"eval_runtime": 0.5393, |
|
"eval_samples_per_second": 7.417, |
|
"eval_steps_per_second": 1.854, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0559999999999999e-07, |
|
"loss": 1.3346, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2489620447158813, |
|
"eval_runtime": 0.5325, |
|
"eval_samples_per_second": 7.512, |
|
"eval_steps_per_second": 1.878, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0511999999999999e-07, |
|
"loss": 1.3097, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2476825714111328, |
|
"eval_runtime": 0.7695, |
|
"eval_samples_per_second": 5.198, |
|
"eval_steps_per_second": 1.3, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0463999999999999e-07, |
|
"loss": 1.3224, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2467668056488037, |
|
"eval_runtime": 0.7586, |
|
"eval_samples_per_second": 5.273, |
|
"eval_steps_per_second": 1.318, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0416e-07, |
|
"loss": 1.321, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2455267906188965, |
|
"eval_runtime": 0.7554, |
|
"eval_samples_per_second": 5.295, |
|
"eval_steps_per_second": 1.324, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.0368e-07, |
|
"loss": 1.3069, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.2445768117904663, |
|
"eval_runtime": 0.7823, |
|
"eval_samples_per_second": 5.113, |
|
"eval_steps_per_second": 1.278, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0319999999999998e-07, |
|
"loss": 1.3358, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.243558406829834, |
|
"eval_runtime": 0.5542, |
|
"eval_samples_per_second": 7.218, |
|
"eval_steps_per_second": 1.804, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0272e-07, |
|
"loss": 1.3413, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.242706060409546, |
|
"eval_runtime": 0.5488, |
|
"eval_samples_per_second": 7.289, |
|
"eval_steps_per_second": 1.822, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0224e-07, |
|
"loss": 1.3328, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2416179180145264, |
|
"eval_runtime": 0.5351, |
|
"eval_samples_per_second": 7.475, |
|
"eval_steps_per_second": 1.869, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0175999999999999e-07, |
|
"loss": 1.341, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2406481504440308, |
|
"eval_runtime": 0.5426, |
|
"eval_samples_per_second": 7.372, |
|
"eval_steps_per_second": 1.843, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0128e-07, |
|
"loss": 1.3022, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2396165132522583, |
|
"eval_runtime": 0.5368, |
|
"eval_samples_per_second": 7.451, |
|
"eval_steps_per_second": 1.863, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.008e-07, |
|
"loss": 1.3309, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.2385637760162354, |
|
"eval_runtime": 0.731, |
|
"eval_samples_per_second": 5.472, |
|
"eval_steps_per_second": 1.368, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0031999999999999e-07, |
|
"loss": 1.3099, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.237720012664795, |
|
"eval_runtime": 0.7217, |
|
"eval_samples_per_second": 5.543, |
|
"eval_steps_per_second": 1.386, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.983999999999999e-08, |
|
"loss": 1.2979, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.2368155717849731, |
|
"eval_runtime": 0.7961, |
|
"eval_samples_per_second": 5.024, |
|
"eval_steps_per_second": 1.256, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.936e-08, |
|
"loss": 1.3219, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.2358148097991943, |
|
"eval_runtime": 0.6349, |
|
"eval_samples_per_second": 6.3, |
|
"eval_steps_per_second": 1.575, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.888e-08, |
|
"loss": 1.328, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.2349071502685547, |
|
"eval_runtime": 0.5583, |
|
"eval_samples_per_second": 7.165, |
|
"eval_steps_per_second": 1.791, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.84e-08, |
|
"loss": 1.3161, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.233930230140686, |
|
"eval_runtime": 0.5369, |
|
"eval_samples_per_second": 7.45, |
|
"eval_steps_per_second": 1.862, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.792e-08, |
|
"loss": 1.3435, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.2328994274139404, |
|
"eval_runtime": 0.5546, |
|
"eval_samples_per_second": 7.213, |
|
"eval_steps_per_second": 1.803, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.743999999999999e-08, |
|
"loss": 1.3191, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.2323206663131714, |
|
"eval_runtime": 0.5626, |
|
"eval_samples_per_second": 7.11, |
|
"eval_steps_per_second": 1.777, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.695999999999999e-08, |
|
"loss": 1.3427, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.2313920259475708, |
|
"eval_runtime": 0.7579, |
|
"eval_samples_per_second": 5.278, |
|
"eval_steps_per_second": 1.319, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.648e-08, |
|
"loss": 1.3157, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.2306554317474365, |
|
"eval_runtime": 0.7565, |
|
"eval_samples_per_second": 5.287, |
|
"eval_steps_per_second": 1.322, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.6e-08, |
|
"loss": 1.31, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.229779839515686, |
|
"eval_runtime": 0.8359, |
|
"eval_samples_per_second": 4.785, |
|
"eval_steps_per_second": 1.196, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.552e-08, |
|
"loss": 1.3418, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.2287366390228271, |
|
"eval_runtime": 0.8, |
|
"eval_samples_per_second": 5.0, |
|
"eval_steps_per_second": 1.25, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.504000000000001e-08, |
|
"loss": 1.3051, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.2279709577560425, |
|
"eval_runtime": 0.5523, |
|
"eval_samples_per_second": 7.243, |
|
"eval_steps_per_second": 1.811, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.456e-08, |
|
"loss": 1.3453, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.227084755897522, |
|
"eval_runtime": 0.5304, |
|
"eval_samples_per_second": 7.542, |
|
"eval_steps_per_second": 1.885, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.407999999999999e-08, |
|
"loss": 1.3146, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.226120114326477, |
|
"eval_runtime": 0.536, |
|
"eval_samples_per_second": 7.462, |
|
"eval_steps_per_second": 1.866, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.359999999999999e-08, |
|
"loss": 1.2961, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.2254786491394043, |
|
"eval_runtime": 0.5464, |
|
"eval_samples_per_second": 7.32, |
|
"eval_steps_per_second": 1.83, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.312e-08, |
|
"loss": 1.2989, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.2248467206954956, |
|
"eval_runtime": 0.5328, |
|
"eval_samples_per_second": 7.507, |
|
"eval_steps_per_second": 1.877, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.264e-08, |
|
"loss": 1.314, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.2238779067993164, |
|
"eval_runtime": 0.7698, |
|
"eval_samples_per_second": 5.196, |
|
"eval_steps_per_second": 1.299, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.215999999999999e-08, |
|
"loss": 1.3137, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.2231634855270386, |
|
"eval_runtime": 0.7516, |
|
"eval_samples_per_second": 5.322, |
|
"eval_steps_per_second": 1.33, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.167999999999998e-08, |
|
"loss": 1.323, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.222124457359314, |
|
"eval_runtime": 0.8004, |
|
"eval_samples_per_second": 4.997, |
|
"eval_steps_per_second": 1.249, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.12e-08, |
|
"loss": 1.3194, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.2216134071350098, |
|
"eval_runtime": 0.5409, |
|
"eval_samples_per_second": 7.394, |
|
"eval_steps_per_second": 1.849, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.072e-08, |
|
"loss": 1.2857, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.220569372177124, |
|
"eval_runtime": 0.5422, |
|
"eval_samples_per_second": 7.377, |
|
"eval_steps_per_second": 1.844, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.024e-08, |
|
"loss": 1.3101, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.2198562622070312, |
|
"eval_runtime": 0.5528, |
|
"eval_samples_per_second": 7.236, |
|
"eval_steps_per_second": 1.809, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.976e-08, |
|
"loss": 1.2962, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.2193635702133179, |
|
"eval_runtime": 0.5512, |
|
"eval_samples_per_second": 7.257, |
|
"eval_steps_per_second": 1.814, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.927999999999999e-08, |
|
"loss": 1.2927, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.218583345413208, |
|
"eval_runtime": 0.5559, |
|
"eval_samples_per_second": 7.195, |
|
"eval_steps_per_second": 1.799, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.879999999999999e-08, |
|
"loss": 1.2728, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.2178785800933838, |
|
"eval_runtime": 0.7605, |
|
"eval_samples_per_second": 5.26, |
|
"eval_steps_per_second": 1.315, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.832e-08, |
|
"loss": 1.2903, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2169758081436157, |
|
"eval_runtime": 0.7602, |
|
"eval_samples_per_second": 5.262, |
|
"eval_steps_per_second": 1.315, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.784e-08, |
|
"loss": 1.3108, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2163543701171875, |
|
"eval_runtime": 0.7924, |
|
"eval_samples_per_second": 5.048, |
|
"eval_steps_per_second": 1.262, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.736e-08, |
|
"loss": 1.2899, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2156285047531128, |
|
"eval_runtime": 0.5511, |
|
"eval_samples_per_second": 7.259, |
|
"eval_steps_per_second": 1.815, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.688000000000001e-08, |
|
"loss": 1.2996, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2148629426956177, |
|
"eval_runtime": 0.5465, |
|
"eval_samples_per_second": 7.32, |
|
"eval_steps_per_second": 1.83, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.639999999999999e-08, |
|
"loss": 1.2865, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2141329050064087, |
|
"eval_runtime": 0.5496, |
|
"eval_samples_per_second": 7.278, |
|
"eval_steps_per_second": 1.819, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.591999999999999e-08, |
|
"loss": 1.3004, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.2135276794433594, |
|
"eval_runtime": 0.8113, |
|
"eval_samples_per_second": 4.93, |
|
"eval_steps_per_second": 1.233, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.543999999999999e-08, |
|
"loss": 1.2916, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2127509117126465, |
|
"eval_runtime": 1.0008, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.999, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.496e-08, |
|
"loss": 1.2975, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2119174003601074, |
|
"eval_runtime": 0.9106, |
|
"eval_samples_per_second": 4.393, |
|
"eval_steps_per_second": 1.098, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.448e-08, |
|
"loss": 1.3071, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2113782167434692, |
|
"eval_runtime": 0.8267, |
|
"eval_samples_per_second": 4.838, |
|
"eval_steps_per_second": 1.21, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.4e-08, |
|
"loss": 1.2793, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2105748653411865, |
|
"eval_runtime": 0.9729, |
|
"eval_samples_per_second": 4.111, |
|
"eval_steps_per_second": 1.028, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.352e-08, |
|
"loss": 1.2755, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2097927331924438, |
|
"eval_runtime": 1.0248, |
|
"eval_samples_per_second": 3.903, |
|
"eval_steps_per_second": 0.976, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.304e-08, |
|
"loss": 1.2968, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2092970609664917, |
|
"eval_runtime": 0.8732, |
|
"eval_samples_per_second": 4.581, |
|
"eval_steps_per_second": 1.145, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.255999999999999e-08, |
|
"loss": 1.3226, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.2085187435150146, |
|
"eval_runtime": 0.994, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 1.006, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.208e-08, |
|
"loss": 1.3117, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.2078773975372314, |
|
"eval_runtime": 0.9321, |
|
"eval_samples_per_second": 4.291, |
|
"eval_steps_per_second": 1.073, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.16e-08, |
|
"loss": 1.2957, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.207309603691101, |
|
"eval_runtime": 0.8433, |
|
"eval_samples_per_second": 4.743, |
|
"eval_steps_per_second": 1.186, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.111999999999999e-08, |
|
"loss": 1.2885, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.206638216972351, |
|
"eval_runtime": 1.0078, |
|
"eval_samples_per_second": 3.969, |
|
"eval_steps_per_second": 0.992, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.063999999999999e-08, |
|
"loss": 1.2731, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.2057451009750366, |
|
"eval_runtime": 0.9696, |
|
"eval_samples_per_second": 4.125, |
|
"eval_steps_per_second": 1.031, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.016e-08, |
|
"loss": 1.2821, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.2051252126693726, |
|
"eval_runtime": 0.8398, |
|
"eval_samples_per_second": 4.763, |
|
"eval_steps_per_second": 1.191, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.968e-08, |
|
"loss": 1.2944, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.2045457363128662, |
|
"eval_runtime": 0.8549, |
|
"eval_samples_per_second": 4.679, |
|
"eval_steps_per_second": 1.17, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.92e-08, |
|
"loss": 1.2768, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.2040053606033325, |
|
"eval_runtime": 0.9035, |
|
"eval_samples_per_second": 4.427, |
|
"eval_steps_per_second": 1.107, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.872e-08, |
|
"loss": 1.2917, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.2034111022949219, |
|
"eval_runtime": 0.8657, |
|
"eval_samples_per_second": 4.621, |
|
"eval_steps_per_second": 1.155, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.823999999999999e-08, |
|
"loss": 1.2935, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.2027428150177002, |
|
"eval_runtime": 0.8157, |
|
"eval_samples_per_second": 4.903, |
|
"eval_steps_per_second": 1.226, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.775999999999999e-08, |
|
"loss": 1.268, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.2022608518600464, |
|
"eval_runtime": 0.8551, |
|
"eval_samples_per_second": 4.678, |
|
"eval_steps_per_second": 1.169, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.728e-08, |
|
"loss": 1.2761, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.2016183137893677, |
|
"eval_runtime": 0.8348, |
|
"eval_samples_per_second": 4.791, |
|
"eval_steps_per_second": 1.198, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.68e-08, |
|
"loss": 1.3107, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.201023817062378, |
|
"eval_runtime": 0.8161, |
|
"eval_samples_per_second": 4.901, |
|
"eval_steps_per_second": 1.225, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.632e-08, |
|
"loss": 1.2752, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.2003718614578247, |
|
"eval_runtime": 1.0033, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.584000000000001e-08, |
|
"loss": 1.2661, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.1996574401855469, |
|
"eval_runtime": 0.8381, |
|
"eval_samples_per_second": 4.773, |
|
"eval_steps_per_second": 1.193, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.536e-08, |
|
"loss": 1.2985, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.199149250984192, |
|
"eval_runtime": 0.8342, |
|
"eval_samples_per_second": 4.795, |
|
"eval_steps_per_second": 1.199, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.487999999999999e-08, |
|
"loss": 1.2801, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.1985127925872803, |
|
"eval_runtime": 0.9225, |
|
"eval_samples_per_second": 4.336, |
|
"eval_steps_per_second": 1.084, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.439999999999999e-08, |
|
"loss": 1.2775, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.1979784965515137, |
|
"eval_runtime": 0.92, |
|
"eval_samples_per_second": 4.348, |
|
"eval_steps_per_second": 1.087, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.392e-08, |
|
"loss": 1.2741, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.1976195573806763, |
|
"eval_runtime": 0.8775, |
|
"eval_samples_per_second": 4.558, |
|
"eval_steps_per_second": 1.14, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.343999999999999e-08, |
|
"loss": 1.2747, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.197205662727356, |
|
"eval_runtime": 0.8885, |
|
"eval_samples_per_second": 4.502, |
|
"eval_steps_per_second": 1.125, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.296e-08, |
|
"loss": 1.2772, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1963942050933838, |
|
"eval_runtime": 0.9218, |
|
"eval_samples_per_second": 4.339, |
|
"eval_steps_per_second": 1.085, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.248e-08, |
|
"loss": 1.2953, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1961135864257812, |
|
"eval_runtime": 0.8111, |
|
"eval_samples_per_second": 4.932, |
|
"eval_steps_per_second": 1.233, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.2e-08, |
|
"loss": 1.3052, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1957508325576782, |
|
"eval_runtime": 0.9897, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 1.01, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.152e-08, |
|
"loss": 1.2505, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1951295137405396, |
|
"eval_runtime": 0.9069, |
|
"eval_samples_per_second": 4.41, |
|
"eval_steps_per_second": 1.103, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.104e-08, |
|
"loss": 1.3088, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1944581270217896, |
|
"eval_runtime": 0.7895, |
|
"eval_samples_per_second": 5.066, |
|
"eval_steps_per_second": 1.267, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.055999999999999e-08, |
|
"loss": 1.2705, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.1939201354980469, |
|
"eval_runtime": 0.912, |
|
"eval_samples_per_second": 4.386, |
|
"eval_steps_per_second": 1.097, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.008e-08, |
|
"loss": 1.2606, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1934112310409546, |
|
"eval_runtime": 0.948, |
|
"eval_samples_per_second": 4.219, |
|
"eval_steps_per_second": 1.055, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.96e-08, |
|
"loss": 1.2729, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1931556463241577, |
|
"eval_runtime": 0.8456, |
|
"eval_samples_per_second": 4.73, |
|
"eval_steps_per_second": 1.183, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.912e-08, |
|
"loss": 1.2642, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1926673650741577, |
|
"eval_runtime": 0.8112, |
|
"eval_samples_per_second": 4.931, |
|
"eval_steps_per_second": 1.233, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.864e-08, |
|
"loss": 1.2903, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1919676065444946, |
|
"eval_runtime": 0.865, |
|
"eval_samples_per_second": 4.624, |
|
"eval_steps_per_second": 1.156, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.816e-08, |
|
"loss": 1.2688, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.191756010055542, |
|
"eval_runtime": 0.8924, |
|
"eval_samples_per_second": 4.482, |
|
"eval_steps_per_second": 1.121, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.768e-08, |
|
"loss": 1.2677, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.1909488439559937, |
|
"eval_runtime": 0.8423, |
|
"eval_samples_per_second": 4.749, |
|
"eval_steps_per_second": 1.187, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.719999999999999e-08, |
|
"loss": 1.2747, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.1905359029769897, |
|
"eval_runtime": 0.8982, |
|
"eval_samples_per_second": 4.453, |
|
"eval_steps_per_second": 1.113, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.671999999999999e-08, |
|
"loss": 1.2512, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.19022798538208, |
|
"eval_runtime": 0.8246, |
|
"eval_samples_per_second": 4.851, |
|
"eval_steps_per_second": 1.213, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.623999999999999e-08, |
|
"loss": 1.2651, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.1898229122161865, |
|
"eval_runtime": 0.8116, |
|
"eval_samples_per_second": 4.928, |
|
"eval_steps_per_second": 1.232, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.576e-08, |
|
"loss": 1.2655, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.189262866973877, |
|
"eval_runtime": 1.0243, |
|
"eval_samples_per_second": 3.905, |
|
"eval_steps_per_second": 0.976, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.528e-08, |
|
"loss": 1.2617, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.1888002157211304, |
|
"eval_runtime": 0.8812, |
|
"eval_samples_per_second": 4.539, |
|
"eval_steps_per_second": 1.135, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.48e-08, |
|
"loss": 1.2764, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.1885006427764893, |
|
"eval_runtime": 0.8185, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 1.222, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.432e-08, |
|
"loss": 1.2531, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.188267469406128, |
|
"eval_runtime": 0.9375, |
|
"eval_samples_per_second": 4.267, |
|
"eval_steps_per_second": 1.067, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.384e-08, |
|
"loss": 1.2911, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.1874159574508667, |
|
"eval_runtime": 0.9562, |
|
"eval_samples_per_second": 4.183, |
|
"eval_steps_per_second": 1.046, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.335999999999999e-08, |
|
"loss": 1.2616, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.1871256828308105, |
|
"eval_runtime": 0.8052, |
|
"eval_samples_per_second": 4.968, |
|
"eval_steps_per_second": 1.242, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.288e-08, |
|
"loss": 1.2537, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.1868540048599243, |
|
"eval_runtime": 1.0118, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 0.988, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.239999999999999e-08, |
|
"loss": 1.2548, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.1864162683486938, |
|
"eval_runtime": 1.0123, |
|
"eval_samples_per_second": 3.951, |
|
"eval_steps_per_second": 0.988, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.192e-08, |
|
"loss": 1.2722, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.1861025094985962, |
|
"eval_runtime": 0.8118, |
|
"eval_samples_per_second": 4.927, |
|
"eval_steps_per_second": 1.232, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.144e-08, |
|
"loss": 1.2717, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1857701539993286, |
|
"eval_runtime": 0.9863, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 1.014, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.096e-08, |
|
"loss": 1.281, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1854195594787598, |
|
"eval_runtime": 0.8058, |
|
"eval_samples_per_second": 4.964, |
|
"eval_steps_per_second": 1.241, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.048e-08, |
|
"loss": 1.2766, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1849565505981445, |
|
"eval_runtime": 0.6952, |
|
"eval_samples_per_second": 5.754, |
|
"eval_steps_per_second": 1.438, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6e-08, |
|
"loss": 1.2962, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1846204996109009, |
|
"eval_runtime": 0.5591, |
|
"eval_samples_per_second": 7.154, |
|
"eval_steps_per_second": 1.789, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 2.5424176349184e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|