|
{ |
|
"best_metric": 5.664193153381348, |
|
"best_model_checkpoint": "output_nf_3/checkpoint-1737", |
|
"epoch": 200.0, |
|
"eval_steps": 500, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.95e-07, |
|
"loss": 12.1762, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.002932551319648094, |
|
"eval_loss": 11.818068504333496, |
|
"eval_runtime": 3.6297, |
|
"eval_samples_per_second": 0.276, |
|
"eval_steps_per_second": 0.276, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.9e-07, |
|
"loss": 11.6538, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.0009775171065493646, |
|
"eval_loss": 11.346792221069336, |
|
"eval_runtime": 3.7372, |
|
"eval_samples_per_second": 0.268, |
|
"eval_steps_per_second": 0.268, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.849999999999999e-07, |
|
"loss": 11.2876, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.0009775171065493646, |
|
"eval_loss": 10.912660598754883, |
|
"eval_runtime": 4.2343, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.8e-07, |
|
"loss": 10.8664, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.0009775171065493646, |
|
"eval_loss": 10.516217231750488, |
|
"eval_runtime": 3.4285, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.75e-07, |
|
"loss": 10.6184, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.0019550342130987292, |
|
"eval_loss": 10.181687355041504, |
|
"eval_runtime": 3.9045, |
|
"eval_samples_per_second": 0.256, |
|
"eval_steps_per_second": 0.256, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.7e-07, |
|
"loss": 10.3132, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.0009775171065493646, |
|
"eval_loss": 9.876521110534668, |
|
"eval_runtime": 3.4438, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.649999999999999e-07, |
|
"loss": 9.9087, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.0009775171065493646, |
|
"eval_loss": 9.607266426086426, |
|
"eval_runtime": 3.7078, |
|
"eval_samples_per_second": 0.27, |
|
"eval_steps_per_second": 0.27, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.6e-07, |
|
"loss": 9.6953, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 9.324377059936523, |
|
"eval_runtime": 3.5648, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.55e-07, |
|
"loss": 9.3741, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 9.031828880310059, |
|
"eval_runtime": 3.42, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.499999999999999e-07, |
|
"loss": 9.2045, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 8.746149063110352, |
|
"eval_runtime": 3.746, |
|
"eval_samples_per_second": 0.267, |
|
"eval_steps_per_second": 0.267, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.45e-07, |
|
"loss": 8.9079, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.0019550342130987292, |
|
"eval_loss": 8.47550106048584, |
|
"eval_runtime": 3.3518, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 9.399999999999999e-07, |
|
"loss": 8.7047, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.011730205278592375, |
|
"eval_loss": 8.222208976745605, |
|
"eval_runtime": 3.4648, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.289, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 9.35e-07, |
|
"loss": 8.4622, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.03128054740957967, |
|
"eval_loss": 7.995096206665039, |
|
"eval_runtime": 3.5365, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 9.3e-07, |
|
"loss": 8.2649, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.06549364613880743, |
|
"eval_loss": 7.805217266082764, |
|
"eval_runtime": 3.456, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.289, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 9.25e-07, |
|
"loss": 8.043, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.10654936461388075, |
|
"eval_loss": 7.649549961090088, |
|
"eval_runtime": 3.7554, |
|
"eval_samples_per_second": 0.266, |
|
"eval_steps_per_second": 0.266, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 9.2e-07, |
|
"loss": 7.9092, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.1378299120234604, |
|
"eval_loss": 7.516005516052246, |
|
"eval_runtime": 4.0508, |
|
"eval_samples_per_second": 0.247, |
|
"eval_steps_per_second": 0.247, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 9.15e-07, |
|
"loss": 7.7103, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.16911045943304007, |
|
"eval_loss": 7.388144016265869, |
|
"eval_runtime": 3.4529, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 9.1e-07, |
|
"loss": 7.5701, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.18181818181818182, |
|
"eval_loss": 7.269326686859131, |
|
"eval_runtime": 3.467, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.288, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 9.05e-07, |
|
"loss": 7.4483, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.19941348973607037, |
|
"eval_loss": 7.154385089874268, |
|
"eval_runtime": 4.1166, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.243, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 9e-07, |
|
"loss": 7.314, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.21603128054740958, |
|
"eval_loss": 7.046892166137695, |
|
"eval_runtime": 5.2069, |
|
"eval_samples_per_second": 0.192, |
|
"eval_steps_per_second": 0.192, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 8.95e-07, |
|
"loss": 7.2101, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.23264907135874877, |
|
"eval_loss": 6.9440107345581055, |
|
"eval_runtime": 3.9239, |
|
"eval_samples_per_second": 0.255, |
|
"eval_steps_per_second": 0.255, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 8.9e-07, |
|
"loss": 7.1026, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.23655913978494625, |
|
"eval_loss": 6.854238033294678, |
|
"eval_runtime": 3.4311, |
|
"eval_samples_per_second": 0.291, |
|
"eval_steps_per_second": 0.291, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 8.85e-07, |
|
"loss": 6.9954, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2482893450635386, |
|
"eval_loss": 6.768421173095703, |
|
"eval_runtime": 4.5026, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 8.799999999999999e-07, |
|
"loss": 6.9206, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2512218963831867, |
|
"eval_loss": 6.685108184814453, |
|
"eval_runtime": 9.9405, |
|
"eval_samples_per_second": 0.101, |
|
"eval_steps_per_second": 0.101, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 8.75e-07, |
|
"loss": 6.8588, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2561094819159335, |
|
"eval_loss": 6.621737957000732, |
|
"eval_runtime": 3.7996, |
|
"eval_samples_per_second": 0.263, |
|
"eval_steps_per_second": 0.263, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 8.699999999999999e-07, |
|
"loss": 6.7975, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.2590420332355816, |
|
"eval_loss": 6.562064170837402, |
|
"eval_runtime": 4.7111, |
|
"eval_samples_per_second": 0.212, |
|
"eval_steps_per_second": 0.212, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 8.65e-07, |
|
"loss": 6.7355, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.26099706744868034, |
|
"eval_loss": 6.5111308097839355, |
|
"eval_runtime": 3.4887, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.287, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 8.599999999999999e-07, |
|
"loss": 6.6928, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.26392961876832843, |
|
"eval_loss": 6.462972640991211, |
|
"eval_runtime": 10.4927, |
|
"eval_samples_per_second": 0.095, |
|
"eval_steps_per_second": 0.095, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 8.55e-07, |
|
"loss": 6.6483, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2697947214076246, |
|
"eval_loss": 6.413713455200195, |
|
"eval_runtime": 9.1985, |
|
"eval_samples_per_second": 0.109, |
|
"eval_steps_per_second": 0.109, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.499999999999999e-07, |
|
"loss": 6.6169, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.27174975562072334, |
|
"eval_loss": 6.367518424987793, |
|
"eval_runtime": 4.5985, |
|
"eval_samples_per_second": 0.217, |
|
"eval_steps_per_second": 0.217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 8.45e-07, |
|
"loss": 6.5498, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.27468230694037143, |
|
"eval_loss": 6.329223155975342, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 0.244, |
|
"eval_steps_per_second": 0.244, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 8.399999999999999e-07, |
|
"loss": 6.5385, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.27663734115347016, |
|
"eval_loss": 6.296669960021973, |
|
"eval_runtime": 3.6593, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.273, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 8.349999999999999e-07, |
|
"loss": 6.5111, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.2785923753665689, |
|
"eval_loss": 6.2592363357543945, |
|
"eval_runtime": 4.3919, |
|
"eval_samples_per_second": 0.228, |
|
"eval_steps_per_second": 0.228, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 8.299999999999999e-07, |
|
"loss": 6.4748, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.28152492668621704, |
|
"eval_loss": 6.231565475463867, |
|
"eval_runtime": 4.3688, |
|
"eval_samples_per_second": 0.229, |
|
"eval_steps_per_second": 0.229, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 8.249999999999999e-07, |
|
"loss": 6.4575, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.28347996089931576, |
|
"eval_loss": 6.209580421447754, |
|
"eval_runtime": 3.5499, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 8.199999999999999e-07, |
|
"loss": 6.4251, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.2844574780058651, |
|
"eval_loss": 6.184682369232178, |
|
"eval_runtime": 3.6926, |
|
"eval_samples_per_second": 0.271, |
|
"eval_steps_per_second": 0.271, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 8.149999999999999e-07, |
|
"loss": 6.4096, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2854349951124145, |
|
"eval_loss": 6.161267280578613, |
|
"eval_runtime": 3.9736, |
|
"eval_samples_per_second": 0.252, |
|
"eval_steps_per_second": 0.252, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 8.1e-07, |
|
"loss": 6.3741, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.2854349951124145, |
|
"eval_loss": 6.140196800231934, |
|
"eval_runtime": 3.9847, |
|
"eval_samples_per_second": 0.251, |
|
"eval_steps_per_second": 0.251, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 8.05e-07, |
|
"loss": 6.3645, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.2873900293255132, |
|
"eval_loss": 6.128504753112793, |
|
"eval_runtime": 3.6581, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.273, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 8e-07, |
|
"loss": 6.3511, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2873900293255132, |
|
"eval_loss": 6.11335563659668, |
|
"eval_runtime": 3.9362, |
|
"eval_samples_per_second": 0.254, |
|
"eval_steps_per_second": 0.254, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 7.95e-07, |
|
"loss": 6.3254, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2873900293255132, |
|
"eval_loss": 6.095658779144287, |
|
"eval_runtime": 3.8555, |
|
"eval_samples_per_second": 0.259, |
|
"eval_steps_per_second": 0.259, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 7.9e-07, |
|
"loss": 6.3077, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.2873900293255132, |
|
"eval_loss": 6.081845283508301, |
|
"eval_runtime": 3.4864, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.287, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 7.85e-07, |
|
"loss": 6.301, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2873900293255132, |
|
"eval_loss": 6.0687761306762695, |
|
"eval_runtime": 4.7364, |
|
"eval_samples_per_second": 0.211, |
|
"eval_steps_per_second": 0.211, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 7.799999999999999e-07, |
|
"loss": 6.2846, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2883675464320626, |
|
"eval_loss": 6.051051616668701, |
|
"eval_runtime": 3.4251, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 7.75e-07, |
|
"loss": 6.2739, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.28934506353861195, |
|
"eval_loss": 6.039752006530762, |
|
"eval_runtime": 4.3563, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.23, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 7.699999999999999e-07, |
|
"loss": 6.2569, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2913000977517107, |
|
"eval_loss": 6.030134677886963, |
|
"eval_runtime": 3.8175, |
|
"eval_samples_per_second": 0.262, |
|
"eval_steps_per_second": 0.262, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 7.65e-07, |
|
"loss": 6.258, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2913000977517107, |
|
"eval_loss": 6.01760721206665, |
|
"eval_runtime": 4.1737, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.24, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 7.599999999999999e-07, |
|
"loss": 6.2273, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.2932551319648094, |
|
"eval_loss": 6.003724575042725, |
|
"eval_runtime": 3.8811, |
|
"eval_samples_per_second": 0.258, |
|
"eval_steps_per_second": 0.258, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 7.55e-07, |
|
"loss": 6.2256, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2932551319648094, |
|
"eval_loss": 5.9961748123168945, |
|
"eval_runtime": 4.331, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.231, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 7.5e-07, |
|
"loss": 6.2065, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2932551319648094, |
|
"eval_loss": 5.986941337585449, |
|
"eval_runtime": 4.6185, |
|
"eval_samples_per_second": 0.217, |
|
"eval_steps_per_second": 0.217, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.45e-07, |
|
"loss": 6.1991, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2932551319648094, |
|
"eval_loss": 5.975553512573242, |
|
"eval_runtime": 3.9164, |
|
"eval_samples_per_second": 0.255, |
|
"eval_steps_per_second": 0.255, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 7.4e-07, |
|
"loss": 6.1895, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.968191623687744, |
|
"eval_runtime": 4.4356, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.225, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 7.35e-07, |
|
"loss": 6.1763, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.961073398590088, |
|
"eval_runtime": 4.4638, |
|
"eval_samples_per_second": 0.224, |
|
"eval_steps_per_second": 0.224, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 7.3e-07, |
|
"loss": 6.1734, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.953503608703613, |
|
"eval_runtime": 3.7888, |
|
"eval_samples_per_second": 0.264, |
|
"eval_steps_per_second": 0.264, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 7.249999999999999e-07, |
|
"loss": 6.1702, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.94571590423584, |
|
"eval_runtime": 4.1672, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.24, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 7.2e-07, |
|
"loss": 6.1556, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 5.937567710876465, |
|
"eval_runtime": 4.244, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 7.149999999999999e-07, |
|
"loss": 6.1481, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 5.930552005767822, |
|
"eval_runtime": 4.4958, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 7.1e-07, |
|
"loss": 6.1425, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.922237396240234, |
|
"eval_runtime": 4.1228, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.243, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 7.049999999999999e-07, |
|
"loss": 6.1416, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.91697883605957, |
|
"eval_runtime": 4.4905, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.223, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 7e-07, |
|
"loss": 6.1328, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.914181232452393, |
|
"eval_runtime": 4.5555, |
|
"eval_samples_per_second": 0.22, |
|
"eval_steps_per_second": 0.22, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 6.949999999999999e-07, |
|
"loss": 6.1176, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.906389236450195, |
|
"eval_runtime": 4.1516, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.241, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 6.9e-07, |
|
"loss": 6.1091, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.901010513305664, |
|
"eval_runtime": 4.2358, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 6.85e-07, |
|
"loss": 6.104, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.896124362945557, |
|
"eval_runtime": 4.4857, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.223, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 6.800000000000001e-07, |
|
"loss": 6.0986, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.891510486602783, |
|
"eval_runtime": 4.4644, |
|
"eval_samples_per_second": 0.224, |
|
"eval_steps_per_second": 0.224, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 6.75e-07, |
|
"loss": 6.089, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 5.885429382324219, |
|
"eval_runtime": 3.9204, |
|
"eval_samples_per_second": 0.255, |
|
"eval_steps_per_second": 0.255, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 6.7e-07, |
|
"loss": 6.0734, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.880984783172607, |
|
"eval_runtime": 4.2715, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.234, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 6.65e-07, |
|
"loss": 6.0905, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.29423264907135877, |
|
"eval_loss": 5.876211166381836, |
|
"eval_runtime": 4.4141, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 6.6e-07, |
|
"loss": 6.0701, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.873714447021484, |
|
"eval_runtime": 4.3879, |
|
"eval_samples_per_second": 0.228, |
|
"eval_steps_per_second": 0.228, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 6.55e-07, |
|
"loss": 6.0595, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.869368553161621, |
|
"eval_runtime": 4.2313, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 6.5e-07, |
|
"loss": 6.0616, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.8661274909973145, |
|
"eval_runtime": 4.1245, |
|
"eval_samples_per_second": 0.242, |
|
"eval_steps_per_second": 0.242, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 6.45e-07, |
|
"loss": 6.0512, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.863522052764893, |
|
"eval_runtime": 4.2876, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.233, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 6.4e-07, |
|
"loss": 6.0415, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.861262321472168, |
|
"eval_runtime": 4.932, |
|
"eval_samples_per_second": 0.203, |
|
"eval_steps_per_second": 0.203, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 6.35e-07, |
|
"loss": 6.0391, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.858279228210449, |
|
"eval_runtime": 4.4327, |
|
"eval_samples_per_second": 0.226, |
|
"eval_steps_per_second": 0.226, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 6.3e-07, |
|
"loss": 6.032, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.854796886444092, |
|
"eval_runtime": 4.4253, |
|
"eval_samples_per_second": 0.226, |
|
"eval_steps_per_second": 0.226, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 6.0317, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.2961876832844575, |
|
"eval_loss": 5.851076126098633, |
|
"eval_runtime": 4.5175, |
|
"eval_samples_per_second": 0.221, |
|
"eval_steps_per_second": 0.221, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 6.2e-07, |
|
"loss": 6.0343, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.29716520039100686, |
|
"eval_loss": 5.847884178161621, |
|
"eval_runtime": 4.5177, |
|
"eval_samples_per_second": 0.221, |
|
"eval_steps_per_second": 0.221, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 6.149999999999999e-07, |
|
"loss": 6.0156, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 5.843489646911621, |
|
"eval_runtime": 4.7261, |
|
"eval_samples_per_second": 0.212, |
|
"eval_steps_per_second": 0.212, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 6.1e-07, |
|
"loss": 6.0167, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 5.8403215408325195, |
|
"eval_runtime": 4.3157, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.232, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 6.049999999999999e-07, |
|
"loss": 6.0052, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 5.83635950088501, |
|
"eval_runtime": 4.3221, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.231, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 6e-07, |
|
"loss": 6.0057, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 5.8336262702941895, |
|
"eval_runtime": 4.8317, |
|
"eval_samples_per_second": 0.207, |
|
"eval_steps_per_second": 0.207, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 5.949999999999999e-07, |
|
"loss": 6.0001, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 5.8319411277771, |
|
"eval_runtime": 4.5299, |
|
"eval_samples_per_second": 0.221, |
|
"eval_steps_per_second": 0.221, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 5.9e-07, |
|
"loss": 6.001, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.2991202346041056, |
|
"eval_loss": 5.828815937042236, |
|
"eval_runtime": 4.4118, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 5.849999999999999e-07, |
|
"loss": 5.9905, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.3010752688172043, |
|
"eval_loss": 5.826550006866455, |
|
"eval_runtime": 4.3318, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.231, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 5.8e-07, |
|
"loss": 5.9906, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.3020527859237537, |
|
"eval_loss": 5.824170112609863, |
|
"eval_runtime": 4.4876, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.223, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 5.749999999999999e-07, |
|
"loss": 5.9862, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.3020527859237537, |
|
"eval_loss": 5.821638584136963, |
|
"eval_runtime": 4.7768, |
|
"eval_samples_per_second": 0.209, |
|
"eval_steps_per_second": 0.209, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 5.699999999999999e-07, |
|
"loss": 5.9829, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.30498533724340177, |
|
"eval_loss": 5.819102764129639, |
|
"eval_runtime": 4.2697, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.234, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 5.649999999999999e-07, |
|
"loss": 5.9725, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.817332744598389, |
|
"eval_runtime": 4.3341, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.231, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 5.6e-07, |
|
"loss": 5.9795, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.814181804656982, |
|
"eval_runtime": 4.759, |
|
"eval_samples_per_second": 0.21, |
|
"eval_steps_per_second": 0.21, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 5.55e-07, |
|
"loss": 5.9695, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.811527729034424, |
|
"eval_runtime": 4.3738, |
|
"eval_samples_per_second": 0.229, |
|
"eval_steps_per_second": 0.229, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 5.5e-07, |
|
"loss": 5.9607, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.808058261871338, |
|
"eval_runtime": 3.4007, |
|
"eval_samples_per_second": 0.294, |
|
"eval_steps_per_second": 0.294, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 5.45e-07, |
|
"loss": 5.9605, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.805708885192871, |
|
"eval_runtime": 3.8195, |
|
"eval_samples_per_second": 0.262, |
|
"eval_steps_per_second": 0.262, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 5.4e-07, |
|
"loss": 5.9591, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.80366849899292, |
|
"eval_runtime": 4.5002, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 5.35e-07, |
|
"loss": 5.9481, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.801074981689453, |
|
"eval_runtime": 4.3082, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.232, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 5.3e-07, |
|
"loss": 5.9501, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.798312664031982, |
|
"eval_runtime": 4.9824, |
|
"eval_samples_per_second": 0.201, |
|
"eval_steps_per_second": 0.201, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 5.25e-07, |
|
"loss": 5.948, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.794301509857178, |
|
"eval_runtime": 3.717, |
|
"eval_samples_per_second": 0.269, |
|
"eval_steps_per_second": 0.269, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 5.2e-07, |
|
"loss": 5.9488, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.790666580200195, |
|
"eval_runtime": 3.5092, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.285, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 5.149999999999999e-07, |
|
"loss": 5.9449, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.3118279569892473, |
|
"eval_loss": 5.788764476776123, |
|
"eval_runtime": 4.3467, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.23, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 5.1e-07, |
|
"loss": 5.9357, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.787247180938721, |
|
"eval_runtime": 4.2514, |
|
"eval_samples_per_second": 0.235, |
|
"eval_steps_per_second": 0.235, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 5.049999999999999e-07, |
|
"loss": 5.9363, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.783356666564941, |
|
"eval_runtime": 3.6762, |
|
"eval_samples_per_second": 0.272, |
|
"eval_steps_per_second": 0.272, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5e-07, |
|
"loss": 5.9368, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.781695365905762, |
|
"eval_runtime": 4.5979, |
|
"eval_samples_per_second": 0.217, |
|
"eval_steps_per_second": 0.217, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 4.95e-07, |
|
"loss": 5.9215, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.779068946838379, |
|
"eval_runtime": 4.3432, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.23, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 4.9e-07, |
|
"loss": 5.9264, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.77487325668335, |
|
"eval_runtime": 3.8835, |
|
"eval_samples_per_second": 0.258, |
|
"eval_steps_per_second": 0.258, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 4.85e-07, |
|
"loss": 5.9126, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.772488594055176, |
|
"eval_runtime": 4.0354, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.248, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 4.8e-07, |
|
"loss": 5.9163, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.769767761230469, |
|
"eval_runtime": 3.6621, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.273, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"loss": 5.9183, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.767302989959717, |
|
"eval_runtime": 4.3085, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.232, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 4.6999999999999995e-07, |
|
"loss": 5.9154, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.763917446136475, |
|
"eval_runtime": 4.2316, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 4.65e-07, |
|
"loss": 5.9149, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.76121187210083, |
|
"eval_runtime": 4.5023, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 4.6e-07, |
|
"loss": 5.9082, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.757934093475342, |
|
"eval_runtime": 4.5081, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 4.55e-07, |
|
"loss": 5.9051, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.755875110626221, |
|
"eval_runtime": 4.207, |
|
"eval_samples_per_second": 0.238, |
|
"eval_steps_per_second": 0.238, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 4.5e-07, |
|
"loss": 5.908, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.753073692321777, |
|
"eval_runtime": 4.5603, |
|
"eval_samples_per_second": 0.219, |
|
"eval_steps_per_second": 0.219, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 4.45e-07, |
|
"loss": 5.901, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.750200271606445, |
|
"eval_runtime": 4.3073, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.232, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"loss": 5.9064, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.748438835144043, |
|
"eval_runtime": 4.0892, |
|
"eval_samples_per_second": 0.245, |
|
"eval_steps_per_second": 0.245, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 4.3499999999999996e-07, |
|
"loss": 5.895, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.745972156524658, |
|
"eval_runtime": 3.6273, |
|
"eval_samples_per_second": 0.276, |
|
"eval_steps_per_second": 0.276, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 4.2999999999999996e-07, |
|
"loss": 5.894, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.743812561035156, |
|
"eval_runtime": 3.373, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"loss": 5.8809, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.741939067840576, |
|
"eval_runtime": 3.9496, |
|
"eval_samples_per_second": 0.253, |
|
"eval_steps_per_second": 0.253, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"loss": 5.8893, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.74192476272583, |
|
"eval_runtime": 4.7772, |
|
"eval_samples_per_second": 0.209, |
|
"eval_steps_per_second": 0.209, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 4.1499999999999994e-07, |
|
"loss": 5.8874, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.3069403714565005, |
|
"eval_loss": 5.739171504974365, |
|
"eval_runtime": 4.0083, |
|
"eval_samples_per_second": 0.249, |
|
"eval_steps_per_second": 0.249, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 4.0999999999999994e-07, |
|
"loss": 5.8798, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.73491907119751, |
|
"eval_runtime": 4.1773, |
|
"eval_samples_per_second": 0.239, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 4.05e-07, |
|
"loss": 5.8826, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.732446193695068, |
|
"eval_runtime": 4.4141, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 4e-07, |
|
"loss": 5.8736, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.729852199554443, |
|
"eval_runtime": 4.4117, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 3.95e-07, |
|
"loss": 5.8751, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.726986408233643, |
|
"eval_runtime": 4.9388, |
|
"eval_samples_per_second": 0.202, |
|
"eval_steps_per_second": 0.202, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 3.8999999999999997e-07, |
|
"loss": 5.8699, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.72526216506958, |
|
"eval_runtime": 4.0541, |
|
"eval_samples_per_second": 0.247, |
|
"eval_steps_per_second": 0.247, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 3.8499999999999997e-07, |
|
"loss": 5.8802, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.723138809204102, |
|
"eval_runtime": 4.1908, |
|
"eval_samples_per_second": 0.239, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"loss": 5.8707, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.7239861488342285, |
|
"eval_runtime": 4.4694, |
|
"eval_samples_per_second": 0.224, |
|
"eval_steps_per_second": 0.224, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 3.75e-07, |
|
"loss": 5.8653, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.723176002502441, |
|
"eval_runtime": 4.2543, |
|
"eval_samples_per_second": 0.235, |
|
"eval_steps_per_second": 0.235, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 3.7e-07, |
|
"loss": 5.8693, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.7180495262146, |
|
"eval_runtime": 4.3031, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.232, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 3.65e-07, |
|
"loss": 5.8662, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.714703559875488, |
|
"eval_runtime": 4.1167, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.243, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 3.6e-07, |
|
"loss": 5.8539, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.713204383850098, |
|
"eval_runtime": 4.0648, |
|
"eval_samples_per_second": 0.246, |
|
"eval_steps_per_second": 0.246, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 3.55e-07, |
|
"loss": 5.8611, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.712745189666748, |
|
"eval_runtime": 4.0643, |
|
"eval_samples_per_second": 0.246, |
|
"eval_steps_per_second": 0.246, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 3.5e-07, |
|
"loss": 5.8495, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.713523864746094, |
|
"eval_runtime": 4.0466, |
|
"eval_samples_per_second": 0.247, |
|
"eval_steps_per_second": 0.247, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 3.45e-07, |
|
"loss": 5.8602, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.711104393005371, |
|
"eval_runtime": 3.9889, |
|
"eval_samples_per_second": 0.251, |
|
"eval_steps_per_second": 0.251, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"loss": 5.8512, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.7077484130859375, |
|
"eval_runtime": 3.6772, |
|
"eval_samples_per_second": 0.272, |
|
"eval_steps_per_second": 0.272, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 3.35e-07, |
|
"loss": 5.8493, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.704965591430664, |
|
"eval_runtime": 4.3621, |
|
"eval_samples_per_second": 0.229, |
|
"eval_steps_per_second": 0.229, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 3.3e-07, |
|
"loss": 5.8477, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.704052925109863, |
|
"eval_runtime": 4.4972, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 3.25e-07, |
|
"loss": 5.8464, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.704152584075928, |
|
"eval_runtime": 3.8106, |
|
"eval_samples_per_second": 0.262, |
|
"eval_steps_per_second": 0.262, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 3.2e-07, |
|
"loss": 5.8459, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.702281951904297, |
|
"eval_runtime": 4.7548, |
|
"eval_samples_per_second": 0.21, |
|
"eval_steps_per_second": 0.21, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 3.15e-07, |
|
"loss": 5.8475, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.7000274658203125, |
|
"eval_runtime": 4.1681, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.24, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 3.1e-07, |
|
"loss": 5.8384, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.698208332061768, |
|
"eval_runtime": 4.6023, |
|
"eval_samples_per_second": 0.217, |
|
"eval_steps_per_second": 0.217, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 3.05e-07, |
|
"loss": 5.8453, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.697631359100342, |
|
"eval_runtime": 3.8429, |
|
"eval_samples_per_second": 0.26, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 3e-07, |
|
"loss": 5.8441, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.697261810302734, |
|
"eval_runtime": 4.2789, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.234, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 2.95e-07, |
|
"loss": 5.838, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.697081565856934, |
|
"eval_runtime": 4.6381, |
|
"eval_samples_per_second": 0.216, |
|
"eval_steps_per_second": 0.216, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 2.9e-07, |
|
"loss": 5.8463, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.695047855377197, |
|
"eval_runtime": 4.2254, |
|
"eval_samples_per_second": 0.237, |
|
"eval_steps_per_second": 0.237, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 2.8499999999999997e-07, |
|
"loss": 5.8385, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.692078113555908, |
|
"eval_runtime": 3.9314, |
|
"eval_samples_per_second": 0.254, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 2.8e-07, |
|
"loss": 5.8354, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.69093656539917, |
|
"eval_runtime": 4.4756, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.223, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 2.75e-07, |
|
"loss": 5.8283, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.690831184387207, |
|
"eval_runtime": 4.2673, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.234, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 2.7e-07, |
|
"loss": 5.8363, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.6901655197143555, |
|
"eval_runtime": 4.2991, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.233, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 2.65e-07, |
|
"loss": 5.8433, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.689047813415527, |
|
"eval_runtime": 4.5899, |
|
"eval_samples_per_second": 0.218, |
|
"eval_steps_per_second": 0.218, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 2.6e-07, |
|
"loss": 5.8302, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.30791788856304986, |
|
"eval_loss": 5.689029216766357, |
|
"eval_runtime": 4.2306, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 2.55e-07, |
|
"loss": 5.8276, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.688091278076172, |
|
"eval_runtime": 4.1426, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.241, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 2.5e-07, |
|
"loss": 5.8366, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.686432838439941, |
|
"eval_runtime": 4.4478, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.225, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 2.45e-07, |
|
"loss": 5.826, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.685244083404541, |
|
"eval_runtime": 4.0347, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.248, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 2.4e-07, |
|
"loss": 5.8293, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.68437385559082, |
|
"eval_runtime": 3.7004, |
|
"eval_samples_per_second": 0.27, |
|
"eval_steps_per_second": 0.27, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 2.3499999999999997e-07, |
|
"loss": 5.8278, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.683380603790283, |
|
"eval_runtime": 4.0294, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.248, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 2.3e-07, |
|
"loss": 5.8239, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.683019638061523, |
|
"eval_runtime": 4.1012, |
|
"eval_samples_per_second": 0.244, |
|
"eval_steps_per_second": 0.244, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 2.25e-07, |
|
"loss": 5.8262, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 5.681789875030518, |
|
"eval_runtime": 4.3436, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.23, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 2.1999999999999998e-07, |
|
"loss": 5.8253, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.680798530578613, |
|
"eval_runtime": 3.5916, |
|
"eval_samples_per_second": 0.278, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 2.1499999999999998e-07, |
|
"loss": 5.8169, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.679258346557617, |
|
"eval_runtime": 5.377, |
|
"eval_samples_per_second": 0.186, |
|
"eval_steps_per_second": 0.186, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 2.0999999999999997e-07, |
|
"loss": 5.8201, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.679547309875488, |
|
"eval_runtime": 3.4728, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.288, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 2.0499999999999997e-07, |
|
"loss": 5.8077, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.6798624992370605, |
|
"eval_runtime": 4.4591, |
|
"eval_samples_per_second": 0.224, |
|
"eval_steps_per_second": 0.224, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2e-07, |
|
"loss": 5.8222, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.678928852081299, |
|
"eval_runtime": 3.9956, |
|
"eval_samples_per_second": 0.25, |
|
"eval_steps_per_second": 0.25, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 1.9499999999999999e-07, |
|
"loss": 5.8191, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.677757263183594, |
|
"eval_runtime": 3.5865, |
|
"eval_samples_per_second": 0.279, |
|
"eval_steps_per_second": 0.279, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 1.8999999999999998e-07, |
|
"loss": 5.83, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.676819801330566, |
|
"eval_runtime": 4.7244, |
|
"eval_samples_per_second": 0.212, |
|
"eval_steps_per_second": 0.212, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 1.85e-07, |
|
"loss": 5.8183, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.675673484802246, |
|
"eval_runtime": 4.423, |
|
"eval_samples_per_second": 0.226, |
|
"eval_steps_per_second": 0.226, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 1.8e-07, |
|
"loss": 5.8124, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.674676418304443, |
|
"eval_runtime": 4.507, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.222, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 1.75e-07, |
|
"loss": 5.8119, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.674499034881592, |
|
"eval_runtime": 4.5793, |
|
"eval_samples_per_second": 0.218, |
|
"eval_steps_per_second": 0.218, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 1.7000000000000001e-07, |
|
"loss": 5.821, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.674384117126465, |
|
"eval_runtime": 4.1895, |
|
"eval_samples_per_second": 0.239, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 1.65e-07, |
|
"loss": 5.807, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.6736602783203125, |
|
"eval_runtime": 3.6058, |
|
"eval_samples_per_second": 0.277, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 1.6e-07, |
|
"loss": 5.8177, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.672515392303467, |
|
"eval_runtime": 4.5742, |
|
"eval_samples_per_second": 0.219, |
|
"eval_steps_per_second": 0.219, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"learning_rate": 1.55e-07, |
|
"loss": 5.8046, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.671017169952393, |
|
"eval_runtime": 3.599, |
|
"eval_samples_per_second": 0.278, |
|
"eval_steps_per_second": 0.278, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 1.5e-07, |
|
"loss": 5.8093, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.670760154724121, |
|
"eval_runtime": 3.7495, |
|
"eval_samples_per_second": 0.267, |
|
"eval_steps_per_second": 0.267, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"learning_rate": 1.45e-07, |
|
"loss": 5.8145, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.6709699630737305, |
|
"eval_runtime": 4.4875, |
|
"eval_samples_per_second": 0.223, |
|
"eval_steps_per_second": 0.223, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 1.4e-07, |
|
"loss": 5.803, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.67042350769043, |
|
"eval_runtime": 4.2512, |
|
"eval_samples_per_second": 0.235, |
|
"eval_steps_per_second": 0.235, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"learning_rate": 1.35e-07, |
|
"loss": 5.8038, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.6697306632995605, |
|
"eval_runtime": 3.7575, |
|
"eval_samples_per_second": 0.266, |
|
"eval_steps_per_second": 0.266, |
|
"step": 1557 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"learning_rate": 1.3e-07, |
|
"loss": 5.807, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.668872356414795, |
|
"eval_runtime": 3.653, |
|
"eval_samples_per_second": 0.274, |
|
"eval_steps_per_second": 0.274, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 1.25e-07, |
|
"loss": 5.7974, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.668323993682861, |
|
"eval_runtime": 3.3471, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 1.2e-07, |
|
"loss": 5.8089, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.66883659362793, |
|
"eval_runtime": 3.7471, |
|
"eval_samples_per_second": 0.267, |
|
"eval_steps_per_second": 0.267, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 1.15e-07, |
|
"loss": 5.8067, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.669277667999268, |
|
"eval_runtime": 3.325, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.301, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"learning_rate": 1.0999999999999999e-07, |
|
"loss": 5.8092, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 5.669507026672363, |
|
"eval_runtime": 3.3536, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"learning_rate": 1.0499999999999999e-07, |
|
"loss": 5.8047, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.668713092803955, |
|
"eval_runtime": 4.0451, |
|
"eval_samples_per_second": 0.247, |
|
"eval_steps_per_second": 0.247, |
|
"step": 1611 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 1e-07, |
|
"loss": 5.8007, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.667922019958496, |
|
"eval_runtime": 4.4531, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.225, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"learning_rate": 9.499999999999999e-08, |
|
"loss": 5.8041, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.667240619659424, |
|
"eval_runtime": 4.9291, |
|
"eval_samples_per_second": 0.203, |
|
"eval_steps_per_second": 0.203, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"learning_rate": 9e-08, |
|
"loss": 5.8072, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.666664123535156, |
|
"eval_runtime": 3.1517, |
|
"eval_samples_per_second": 0.317, |
|
"eval_steps_per_second": 0.317, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"learning_rate": 8.500000000000001e-08, |
|
"loss": 5.8093, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.66619873046875, |
|
"eval_runtime": 3.528, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 1647 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 8e-08, |
|
"loss": 5.7948, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.665794372558594, |
|
"eval_runtime": 3.2036, |
|
"eval_samples_per_second": 0.312, |
|
"eval_steps_per_second": 0.312, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 7.5e-08, |
|
"loss": 5.7968, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.665581703186035, |
|
"eval_runtime": 4.064, |
|
"eval_samples_per_second": 0.246, |
|
"eval_steps_per_second": 0.246, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"learning_rate": 7e-08, |
|
"loss": 5.8033, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.665287494659424, |
|
"eval_runtime": 3.7071, |
|
"eval_samples_per_second": 0.27, |
|
"eval_steps_per_second": 0.27, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"learning_rate": 6.5e-08, |
|
"loss": 5.8031, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.665118217468262, |
|
"eval_runtime": 3.8502, |
|
"eval_samples_per_second": 0.26, |
|
"eval_steps_per_second": 0.26, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"learning_rate": 6e-08, |
|
"loss": 5.7953, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664963722229004, |
|
"eval_runtime": 3.5099, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.285, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 5.4999999999999996e-08, |
|
"loss": 5.8085, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.6647257804870605, |
|
"eval_runtime": 3.1431, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 5e-08, |
|
"loss": 5.8021, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664588928222656, |
|
"eval_runtime": 3.9583, |
|
"eval_samples_per_second": 0.253, |
|
"eval_steps_per_second": 0.253, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"learning_rate": 4.5e-08, |
|
"loss": 5.7995, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664261817932129, |
|
"eval_runtime": 3.9361, |
|
"eval_samples_per_second": 0.254, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 4e-08, |
|
"loss": 5.8057, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664216041564941, |
|
"eval_runtime": 3.9765, |
|
"eval_samples_per_second": 0.251, |
|
"eval_steps_per_second": 0.251, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"learning_rate": 3.5e-08, |
|
"loss": 5.7989, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664193153381348, |
|
"eval_runtime": 3.0391, |
|
"eval_samples_per_second": 0.329, |
|
"eval_steps_per_second": 0.329, |
|
"step": 1737 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"learning_rate": 3e-08, |
|
"loss": 5.7977, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664215564727783, |
|
"eval_runtime": 3.3818, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 2.5e-08, |
|
"loss": 5.8009, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664393901824951, |
|
"eval_runtime": 3.3657, |
|
"eval_samples_per_second": 0.297, |
|
"eval_steps_per_second": 0.297, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"learning_rate": 2e-08, |
|
"loss": 5.7988, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.6645026206970215, |
|
"eval_runtime": 3.9312, |
|
"eval_samples_per_second": 0.254, |
|
"eval_steps_per_second": 0.254, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"learning_rate": 1.5e-08, |
|
"loss": 5.8016, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664463043212891, |
|
"eval_runtime": 3.7271, |
|
"eval_samples_per_second": 0.268, |
|
"eval_steps_per_second": 0.268, |
|
"step": 1773 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"learning_rate": 1e-08, |
|
"loss": 5.7929, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664458751678467, |
|
"eval_runtime": 3.5464, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"learning_rate": 5e-09, |
|
"loss": 5.7973, |
|
"step": 1791 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664454936981201, |
|
"eval_runtime": 3.6778, |
|
"eval_samples_per_second": 0.272, |
|
"eval_steps_per_second": 0.272, |
|
"step": 1791 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.0, |
|
"loss": 5.8022, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.3098729227761486, |
|
"eval_loss": 5.664452075958252, |
|
"eval_runtime": 3.5398, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 1800, |
|
"total_flos": 7834231111680000.0, |
|
"train_loss": 6.335995424058702, |
|
"train_runtime": 30730.8013, |
|
"train_samples_per_second": 0.059, |
|
"train_steps_per_second": 0.059 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 7834231111680000.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|