|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 1.7297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 0.9422, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.7053, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.6722, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.6335, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.6203, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.999982661974106e-05, |
|
"loss": 0.6116, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9997876160873005e-05, |
|
"loss": 0.6105, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.9993758941929738e-05, |
|
"loss": 0.5888, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.9987475855203325e-05, |
|
"loss": 0.5663, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.997902826237712e-05, |
|
"loss": 0.5533, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.9968417994230628e-05, |
|
"loss": 0.5662, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.9955647350242765e-05, |
|
"loss": 0.5717, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.99407190980935e-05, |
|
"loss": 0.5193, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.9923636473064024e-05, |
|
"loss": 0.4847, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.990440317733561e-05, |
|
"loss": 0.4733, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.9883023379187267e-05, |
|
"loss": 0.4801, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.9859501712092374e-05, |
|
"loss": 0.4299, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.9833843273714506e-05, |
|
"loss": 0.3846, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.9806053624802667e-05, |
|
"loss": 0.3733, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.9776138787986137e-05, |
|
"loss": 0.3646, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.9744105246469264e-05, |
|
"loss": 0.4011, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.9709959942626366e-05, |
|
"loss": 0.3151, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1.9673710276497207e-05, |
|
"loss": 0.2784, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.9635364104183236e-05, |
|
"loss": 0.3248, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.9594929736144978e-05, |
|
"loss": 0.3083, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.955241593540098e-05, |
|
"loss": 0.2681, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.9507831915628706e-05, |
|
"loss": 0.2364, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.9461187339167673e-05, |
|
"loss": 0.2565, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.9412492314925453e-05, |
|
"loss": 0.2661, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.9361757396186834e-05, |
|
"loss": 0.2562, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.9308993578326688e-05, |
|
"loss": 0.2189, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.9254212296427043e-05, |
|
"loss": 0.1989, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 1.9197425422798832e-05, |
|
"loss": 0.2321, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.9138645264408917e-05, |
|
"loss": 0.229, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.9077884560212885e-05, |
|
"loss": 0.1959, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 1.901515647839425e-05, |
|
"loss": 0.1936, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 1.8950474613510605e-05, |
|
"loss": 0.1907, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 1.8883852983547383e-05, |
|
"loss": 0.1959, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.8815306026879854e-05, |
|
"loss": 0.1852, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1.8744848599144027e-05, |
|
"loss": 0.1823, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1.8672495970017067e-05, |
|
"loss": 0.154, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.859826381990806e-05, |
|
"loss": 0.1764, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.8522168236559693e-05, |
|
"loss": 0.1748, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 1.8444225711561713e-05, |
|
"loss": 0.139, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 1.83644531367768e-05, |
|
"loss": 0.1648, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 1.8282867800679774e-05, |
|
"loss": 0.1677, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 1.8199487384610758e-05, |
|
"loss": 0.1728, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 1.8114329958943272e-05, |
|
"loss": 0.1436, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 1.8027413979167968e-05, |
|
"loss": 0.1336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 1.7938758281892928e-05, |
|
"loss": 0.1426, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 1.7848382080761343e-05, |
|
"loss": 0.1725, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 1.775630496228752e-05, |
|
"loss": 0.1362, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 1.766254688161202e-05, |
|
"loss": 0.1287, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.7567128158176955e-05, |
|
"loss": 0.1219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 1.7470069471322325e-05, |
|
"loss": 0.1456, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 1.737139185580434e-05, |
|
"loss": 0.1376, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 1.7271116697236717e-05, |
|
"loss": 0.1209, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 1.7169265727455964e-05, |
|
"loss": 0.1316, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.7065861019811598e-05, |
|
"loss": 0.1296, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 1.6960924984382373e-05, |
|
"loss": 0.1359, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 1.685448036311951e-05, |
|
"loss": 0.1123, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 1.6746550224918032e-05, |
|
"loss": 0.1227, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 1.663715796061722e-05, |
|
"loss": 0.1282, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 1.6526327277931278e-05, |
|
"loss": 0.1055, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.6414082196311402e-05, |
|
"loss": 0.1288, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 1.630044704174018e-05, |
|
"loss": 0.1145, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 1.6185446441459652e-05, |
|
"loss": 0.1158, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 1.6069105318634024e-05, |
|
"loss": 0.1119, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 1.5951448886948286e-05, |
|
"loss": 0.1119, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 1.5832502645143837e-05, |
|
"loss": 0.0981, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 1.571229237149235e-05, |
|
"loss": 0.1142, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 1.5590844118209055e-05, |
|
"loss": 0.1054, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 1.5468184205806646e-05, |
|
"loss": 0.1012, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 1.534433921739105e-05, |
|
"loss": 0.1047, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 1.5219335992900282e-05, |
|
"loss": 0.0992, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.5093201623287631e-05, |
|
"loss": 0.1063, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"learning_rate": 1.4965963444650465e-05, |
|
"loss": 0.0863, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 1.4837649032305885e-05, |
|
"loss": 0.0949, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.4708286194814565e-05, |
|
"loss": 0.0829, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 1.4577902967953995e-05, |
|
"loss": 0.0892, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 1.4446527608642538e-05, |
|
"loss": 0.0972, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 1.4314188588815514e-05, |
|
"loss": 0.0848, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 1.4180914589254716e-05, |
|
"loss": 0.0849, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 1.4046734493372646e-05, |
|
"loss": 0.0877, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 1.3911677380952853e-05, |
|
"loss": 0.0877, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 19.77, |
|
"learning_rate": 1.3775772521847683e-05, |
|
"loss": 0.0754, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.3639049369634878e-05, |
|
"loss": 0.0901, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 20.23, |
|
"learning_rate": 1.3501537555234323e-05, |
|
"loss": 0.0826, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 1.3363266880486388e-05, |
|
"loss": 0.0858, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 20.68, |
|
"learning_rate": 1.3224267311693186e-05, |
|
"loss": 0.0791, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 1.308456897312425e-05, |
|
"loss": 0.0756, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 21.14, |
|
"learning_rate": 1.2944202140487905e-05, |
|
"loss": 0.0683, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 1.2803197234369878e-05, |
|
"loss": 0.0806, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 21.59, |
|
"learning_rate": 1.2661584813640485e-05, |
|
"loss": 0.0841, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 1.2519395568831839e-05, |
|
"loss": 0.0917, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"learning_rate": 1.2376660315486535e-05, |
|
"loss": 0.0708, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 1.223340998747927e-05, |
|
"loss": 0.0768, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 1.2089675630312755e-05, |
|
"loss": 0.0834, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 1.1945488394389479e-05, |
|
"loss": 0.0769, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 1.1800879528260761e-05, |
|
"loss": 0.0751, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 23.18, |
|
"learning_rate": 1.1655880371854454e-05, |
|
"loss": 0.08, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 1.1510522349682922e-05, |
|
"loss": 0.0603, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 23.64, |
|
"learning_rate": 1.1364836964032658e-05, |
|
"loss": 0.0685, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 23.86, |
|
"learning_rate": 1.1218855788137016e-05, |
|
"loss": 0.0735, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 24.09, |
|
"learning_rate": 1.107261045933363e-05, |
|
"loss": 0.0615, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 1.092613267220788e-05, |
|
"loss": 0.0678, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 1.0779454171723994e-05, |
|
"loss": 0.0687, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 1.0632606746345203e-05, |
|
"loss": 0.0669, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.0485622221144485e-05, |
|
"loss": 0.071, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 1.0338532450907373e-05, |
|
"loss": 0.0708, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"learning_rate": 1.0191369313228319e-05, |
|
"loss": 0.0677, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 1.0044164701602111e-05, |
|
"loss": 0.0738, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 9.896950518511863e-06, |
|
"loss": 0.0753, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 9.749758668515027e-06, |
|
"loss": 0.0689, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"learning_rate": 9.602621051328998e-06, |
|
"loss": 0.0688, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 9.455569554917701e-06, |
|
"loss": 0.0781, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"learning_rate": 9.308636048580813e-06, |
|
"loss": 0.0635, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 27.05, |
|
"learning_rate": 9.161852376046953e-06, |
|
"loss": 0.068, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 9.015250348572452e-06, |
|
"loss": 0.0602, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 8.868861738047158e-06, |
|
"loss": 0.0502, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 8.72271827010876e-06, |
|
"loss": 0.0531, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 8.576851617267151e-06, |
|
"loss": 0.0613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 28.18, |
|
"learning_rate": 8.431293392040283e-06, |
|
"loss": 0.0636, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 28.41, |
|
"learning_rate": 8.286075140103058e-06, |
|
"loss": 0.0618, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 8.141228333450673e-06, |
|
"loss": 0.0652, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 7.99678436357794e-06, |
|
"loss": 0.0742, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"learning_rate": 7.852774534676073e-06, |
|
"loss": 0.0529, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 29.32, |
|
"learning_rate": 7.709230056848356e-06, |
|
"loss": 0.0441, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 7.5661820393462605e-06, |
|
"loss": 0.0575, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 7.423661483827357e-06, |
|
"loss": 0.0552, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 7.2816992776365714e-06, |
|
"loss": 0.0518, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 30.23, |
|
"learning_rate": 7.1403261871122466e-06, |
|
"loss": 0.0631, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 6.999572850918357e-06, |
|
"loss": 0.0496, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 30.68, |
|
"learning_rate": 6.859469773404471e-06, |
|
"loss": 0.0575, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 30.91, |
|
"learning_rate": 6.720047317994775e-06, |
|
"loss": 0.0547, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 6.581335700607632e-06, |
|
"loss": 0.0537, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 6.443364983107156e-06, |
|
"loss": 0.0531, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 31.59, |
|
"learning_rate": 6.306165066788121e-06, |
|
"loss": 0.0525, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 6.169765685895703e-06, |
|
"loss": 0.0512, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 6.034196401181414e-06, |
|
"loss": 0.0506, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 32.27, |
|
"learning_rate": 5.899486593496625e-06, |
|
"loss": 0.0472, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 5.765665457425102e-06, |
|
"loss": 0.0538, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 32.73, |
|
"learning_rate": 5.6327619949558806e-06, |
|
"loss": 0.0565, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"learning_rate": 5.500805009197916e-06, |
|
"loss": 0.042, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"learning_rate": 5.369823098137803e-06, |
|
"loss": 0.04, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"learning_rate": 5.23984464844195e-06, |
|
"loss": 0.0512, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 33.64, |
|
"learning_rate": 5.1108978293045915e-06, |
|
"loss": 0.0399, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"learning_rate": 4.983010586342876e-06, |
|
"loss": 0.0493, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"learning_rate": 4.856210635540452e-06, |
|
"loss": 0.0454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 34.32, |
|
"learning_rate": 4.730525457240796e-06, |
|
"loss": 0.0414, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"learning_rate": 4.605982290191623e-06, |
|
"loss": 0.045, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"learning_rate": 4.482608125641633e-06, |
|
"loss": 0.0505, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 4.360429701490935e-06, |
|
"loss": 0.0483, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 35.23, |
|
"learning_rate": 4.239473496496345e-06, |
|
"loss": 0.0458, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 35.45, |
|
"learning_rate": 4.119765724532843e-06, |
|
"loss": 0.0391, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 35.68, |
|
"learning_rate": 4.001332328912475e-06, |
|
"loss": 0.0434, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 35.91, |
|
"learning_rate": 3.884198976761846e-06, |
|
"loss": 0.0516, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 36.14, |
|
"learning_rate": 3.7683910534594957e-06, |
|
"loss": 0.0438, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 3.6539336571343177e-06, |
|
"loss": 0.0424, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 36.59, |
|
"learning_rate": 3.540851593226261e-06, |
|
"loss": 0.0468, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 36.82, |
|
"learning_rate": 3.429169369110422e-06, |
|
"loss": 0.0413, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 3.3189111887857773e-06, |
|
"loss": 0.0479, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 37.27, |
|
"learning_rate": 3.2101009476296306e-06, |
|
"loss": 0.0402, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 3.1027622272189572e-06, |
|
"loss": 0.0428, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 37.73, |
|
"learning_rate": 2.996918290219769e-06, |
|
"loss": 0.0394, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"learning_rate": 2.8925920753455625e-06, |
|
"loss": 0.0359, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 38.18, |
|
"learning_rate": 2.7898061923860153e-06, |
|
"loss": 0.0404, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 38.41, |
|
"learning_rate": 2.688582917306938e-06, |
|
"loss": 0.0439, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 2.5889441874225676e-06, |
|
"loss": 0.0396, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"learning_rate": 2.4909115966412957e-06, |
|
"loss": 0.0462, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 39.09, |
|
"learning_rate": 2.3945063907857647e-06, |
|
"loss": 0.0466, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 39.32, |
|
"learning_rate": 2.299749462988451e-06, |
|
"loss": 0.031, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 39.55, |
|
"learning_rate": 2.2066613491636568e-06, |
|
"loss": 0.0409, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 39.77, |
|
"learning_rate": 2.115262223556912e-06, |
|
"loss": 0.0449, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.025571894372794e-06, |
|
"loss": 0.0373, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 40.23, |
|
"learning_rate": 1.9376097994820286e-06, |
|
"loss": 0.0422, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 40.45, |
|
"learning_rate": 1.8513950022088966e-06, |
|
"loss": 0.0508, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 1.7669461871997817e-06, |
|
"loss": 0.0424, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 1.6842816563737875e-06, |
|
"loss": 0.0403, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 41.14, |
|
"learning_rate": 1.603419324956328e-06, |
|
"loss": 0.0419, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 41.36, |
|
"learning_rate": 1.5243767175964818e-06, |
|
"loss": 0.0334, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 1.4471709645690336e-06, |
|
"loss": 0.0304, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 41.82, |
|
"learning_rate": 1.3718187980619557e-06, |
|
"loss": 0.0413, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 42.05, |
|
"learning_rate": 1.298336548550172e-06, |
|
"loss": 0.0423, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 42.27, |
|
"learning_rate": 1.226740141256395e-06, |
|
"loss": 0.0378, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 1.1570450926997657e-06, |
|
"loss": 0.0316, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 42.73, |
|
"learning_rate": 1.0892665073330932e-06, |
|
"loss": 0.0366, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"learning_rate": 1.023419074269384e-06, |
|
"loss": 0.0335, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 43.18, |
|
"learning_rate": 9.595170640983786e-07, |
|
"loss": 0.0352, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 43.41, |
|
"learning_rate": 8.975743257938186e-07, |
|
"loss": 0.0399, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 43.64, |
|
"learning_rate": 8.37604283712048e-07, |
|
"loss": 0.0344, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"learning_rate": 7.796199346826727e-07, |
|
"loss": 0.04, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 44.09, |
|
"learning_rate": 7.236338451918634e-07, |
|
"loss": 0.0355, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"learning_rate": 6.696581486589071e-07, |
|
"loss": 0.0363, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 44.55, |
|
"learning_rate": 6.177045428066397e-07, |
|
"loss": 0.0266, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 44.77, |
|
"learning_rate": 5.677842871262895e-07, |
|
"loss": 0.0303, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 5.199082004372958e-07, |
|
"loss": 0.0318, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"learning_rate": 4.7408665854263067e-07, |
|
"loss": 0.0297, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 4.3032959198013646e-07, |
|
"loss": 0.0346, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 45.68, |
|
"learning_rate": 3.8864648387036074e-07, |
|
"loss": 0.0359, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 45.91, |
|
"learning_rate": 3.490463678613487e-07, |
|
"loss": 0.0348, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 46.14, |
|
"learning_rate": 3.1153782617086126e-07, |
|
"loss": 0.0387, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 46.36, |
|
"learning_rate": 2.761289877264139e-07, |
|
"loss": 0.0304, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 46.59, |
|
"learning_rate": 2.4282752640355846e-07, |
|
"loss": 0.0337, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 2.1164065936278732e-07, |
|
"loss": 0.0432, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 47.05, |
|
"learning_rate": 1.8257514548541632e-07, |
|
"loss": 0.0298, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 47.27, |
|
"learning_rate": 1.5563728390878496e-07, |
|
"loss": 0.0322, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 1.30832912661093e-07, |
|
"loss": 0.0305, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 1.0816740739617471e-07, |
|
"loss": 0.0333, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 47.95, |
|
"learning_rate": 8.764568022847198e-08, |
|
"loss": 0.0293, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 48.18, |
|
"learning_rate": 6.92721786684769e-08, |
|
"loss": 0.0428, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 48.41, |
|
"learning_rate": 5.305088465885067e-08, |
|
"loss": 0.0363, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 48.64, |
|
"learning_rate": 3.898531371145597e-08, |
|
"loss": 0.0329, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 48.86, |
|
"learning_rate": 2.7078514145459924e-08, |
|
"loss": 0.0308, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 49.09, |
|
"learning_rate": 1.7333066426706845e-08, |
|
"loss": 0.0318, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"learning_rate": 9.75108260846569e-09, |
|
"loss": 0.03, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 49.55, |
|
"learning_rate": 4.334205873705033e-09, |
|
"loss": 0.0302, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 49.77, |
|
"learning_rate": 1.0836101789768462e-09, |
|
"loss": 0.0264, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0382, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 2200, |
|
"total_flos": 3.745171436910674e+17, |
|
"train_loss": 0.13681490471417254, |
|
"train_runtime": 4969.2412, |
|
"train_samples_per_second": 13.865, |
|
"train_steps_per_second": 0.443 |
|
} |
|
], |
|
"max_steps": 2200, |
|
"num_train_epochs": 50, |
|
"total_flos": 3.745171436910674e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|