RxnIM / rxn /model /trainer_state.json
CYF200127's picture
Upload 12 files
092aa0d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 2200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"learning_rate": 3.0303030303030305e-06,
"loss": 1.7297,
"step": 10
},
{
"epoch": 0.45,
"learning_rate": 6.060606060606061e-06,
"loss": 0.9422,
"step": 20
},
{
"epoch": 0.68,
"learning_rate": 9.090909090909091e-06,
"loss": 0.7053,
"step": 30
},
{
"epoch": 0.91,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.6722,
"step": 40
},
{
"epoch": 1.14,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.6335,
"step": 50
},
{
"epoch": 1.36,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.6203,
"step": 60
},
{
"epoch": 1.59,
"learning_rate": 1.999982661974106e-05,
"loss": 0.6116,
"step": 70
},
{
"epoch": 1.82,
"learning_rate": 1.9997876160873005e-05,
"loss": 0.6105,
"step": 80
},
{
"epoch": 2.05,
"learning_rate": 1.9993758941929738e-05,
"loss": 0.5888,
"step": 90
},
{
"epoch": 2.27,
"learning_rate": 1.9987475855203325e-05,
"loss": 0.5663,
"step": 100
},
{
"epoch": 2.5,
"learning_rate": 1.997902826237712e-05,
"loss": 0.5533,
"step": 110
},
{
"epoch": 2.73,
"learning_rate": 1.9968417994230628e-05,
"loss": 0.5662,
"step": 120
},
{
"epoch": 2.95,
"learning_rate": 1.9955647350242765e-05,
"loss": 0.5717,
"step": 130
},
{
"epoch": 3.18,
"learning_rate": 1.99407190980935e-05,
"loss": 0.5193,
"step": 140
},
{
"epoch": 3.41,
"learning_rate": 1.9923636473064024e-05,
"loss": 0.4847,
"step": 150
},
{
"epoch": 3.64,
"learning_rate": 1.990440317733561e-05,
"loss": 0.4733,
"step": 160
},
{
"epoch": 3.86,
"learning_rate": 1.9883023379187267e-05,
"loss": 0.4801,
"step": 170
},
{
"epoch": 4.09,
"learning_rate": 1.9859501712092374e-05,
"loss": 0.4299,
"step": 180
},
{
"epoch": 4.32,
"learning_rate": 1.9833843273714506e-05,
"loss": 0.3846,
"step": 190
},
{
"epoch": 4.55,
"learning_rate": 1.9806053624802667e-05,
"loss": 0.3733,
"step": 200
},
{
"epoch": 4.77,
"learning_rate": 1.9776138787986137e-05,
"loss": 0.3646,
"step": 210
},
{
"epoch": 5.0,
"learning_rate": 1.9744105246469264e-05,
"loss": 0.4011,
"step": 220
},
{
"epoch": 5.23,
"learning_rate": 1.9709959942626366e-05,
"loss": 0.3151,
"step": 230
},
{
"epoch": 5.45,
"learning_rate": 1.9673710276497207e-05,
"loss": 0.2784,
"step": 240
},
{
"epoch": 5.68,
"learning_rate": 1.9635364104183236e-05,
"loss": 0.3248,
"step": 250
},
{
"epoch": 5.91,
"learning_rate": 1.9594929736144978e-05,
"loss": 0.3083,
"step": 260
},
{
"epoch": 6.14,
"learning_rate": 1.955241593540098e-05,
"loss": 0.2681,
"step": 270
},
{
"epoch": 6.36,
"learning_rate": 1.9507831915628706e-05,
"loss": 0.2364,
"step": 280
},
{
"epoch": 6.59,
"learning_rate": 1.9461187339167673e-05,
"loss": 0.2565,
"step": 290
},
{
"epoch": 6.82,
"learning_rate": 1.9412492314925453e-05,
"loss": 0.2661,
"step": 300
},
{
"epoch": 7.05,
"learning_rate": 1.9361757396186834e-05,
"loss": 0.2562,
"step": 310
},
{
"epoch": 7.27,
"learning_rate": 1.9308993578326688e-05,
"loss": 0.2189,
"step": 320
},
{
"epoch": 7.5,
"learning_rate": 1.9254212296427043e-05,
"loss": 0.1989,
"step": 330
},
{
"epoch": 7.73,
"learning_rate": 1.9197425422798832e-05,
"loss": 0.2321,
"step": 340
},
{
"epoch": 7.95,
"learning_rate": 1.9138645264408917e-05,
"loss": 0.229,
"step": 350
},
{
"epoch": 8.18,
"learning_rate": 1.9077884560212885e-05,
"loss": 0.1959,
"step": 360
},
{
"epoch": 8.41,
"learning_rate": 1.901515647839425e-05,
"loss": 0.1936,
"step": 370
},
{
"epoch": 8.64,
"learning_rate": 1.8950474613510605e-05,
"loss": 0.1907,
"step": 380
},
{
"epoch": 8.86,
"learning_rate": 1.8883852983547383e-05,
"loss": 0.1959,
"step": 390
},
{
"epoch": 9.09,
"learning_rate": 1.8815306026879854e-05,
"loss": 0.1852,
"step": 400
},
{
"epoch": 9.32,
"learning_rate": 1.8744848599144027e-05,
"loss": 0.1823,
"step": 410
},
{
"epoch": 9.55,
"learning_rate": 1.8672495970017067e-05,
"loss": 0.154,
"step": 420
},
{
"epoch": 9.77,
"learning_rate": 1.859826381990806e-05,
"loss": 0.1764,
"step": 430
},
{
"epoch": 10.0,
"learning_rate": 1.8522168236559693e-05,
"loss": 0.1748,
"step": 440
},
{
"epoch": 10.23,
"learning_rate": 1.8444225711561713e-05,
"loss": 0.139,
"step": 450
},
{
"epoch": 10.45,
"learning_rate": 1.83644531367768e-05,
"loss": 0.1648,
"step": 460
},
{
"epoch": 10.68,
"learning_rate": 1.8282867800679774e-05,
"loss": 0.1677,
"step": 470
},
{
"epoch": 10.91,
"learning_rate": 1.8199487384610758e-05,
"loss": 0.1728,
"step": 480
},
{
"epoch": 11.14,
"learning_rate": 1.8114329958943272e-05,
"loss": 0.1436,
"step": 490
},
{
"epoch": 11.36,
"learning_rate": 1.8027413979167968e-05,
"loss": 0.1336,
"step": 500
},
{
"epoch": 11.59,
"learning_rate": 1.7938758281892928e-05,
"loss": 0.1426,
"step": 510
},
{
"epoch": 11.82,
"learning_rate": 1.7848382080761343e-05,
"loss": 0.1725,
"step": 520
},
{
"epoch": 12.05,
"learning_rate": 1.775630496228752e-05,
"loss": 0.1362,
"step": 530
},
{
"epoch": 12.27,
"learning_rate": 1.766254688161202e-05,
"loss": 0.1287,
"step": 540
},
{
"epoch": 12.5,
"learning_rate": 1.7567128158176955e-05,
"loss": 0.1219,
"step": 550
},
{
"epoch": 12.73,
"learning_rate": 1.7470069471322325e-05,
"loss": 0.1456,
"step": 560
},
{
"epoch": 12.95,
"learning_rate": 1.737139185580434e-05,
"loss": 0.1376,
"step": 570
},
{
"epoch": 13.18,
"learning_rate": 1.7271116697236717e-05,
"loss": 0.1209,
"step": 580
},
{
"epoch": 13.41,
"learning_rate": 1.7169265727455964e-05,
"loss": 0.1316,
"step": 590
},
{
"epoch": 13.64,
"learning_rate": 1.7065861019811598e-05,
"loss": 0.1296,
"step": 600
},
{
"epoch": 13.86,
"learning_rate": 1.6960924984382373e-05,
"loss": 0.1359,
"step": 610
},
{
"epoch": 14.09,
"learning_rate": 1.685448036311951e-05,
"loss": 0.1123,
"step": 620
},
{
"epoch": 14.32,
"learning_rate": 1.6746550224918032e-05,
"loss": 0.1227,
"step": 630
},
{
"epoch": 14.55,
"learning_rate": 1.663715796061722e-05,
"loss": 0.1282,
"step": 640
},
{
"epoch": 14.77,
"learning_rate": 1.6526327277931278e-05,
"loss": 0.1055,
"step": 650
},
{
"epoch": 15.0,
"learning_rate": 1.6414082196311402e-05,
"loss": 0.1288,
"step": 660
},
{
"epoch": 15.23,
"learning_rate": 1.630044704174018e-05,
"loss": 0.1145,
"step": 670
},
{
"epoch": 15.45,
"learning_rate": 1.6185446441459652e-05,
"loss": 0.1158,
"step": 680
},
{
"epoch": 15.68,
"learning_rate": 1.6069105318634024e-05,
"loss": 0.1119,
"step": 690
},
{
"epoch": 15.91,
"learning_rate": 1.5951448886948286e-05,
"loss": 0.1119,
"step": 700
},
{
"epoch": 16.14,
"learning_rate": 1.5832502645143837e-05,
"loss": 0.0981,
"step": 710
},
{
"epoch": 16.36,
"learning_rate": 1.571229237149235e-05,
"loss": 0.1142,
"step": 720
},
{
"epoch": 16.59,
"learning_rate": 1.5590844118209055e-05,
"loss": 0.1054,
"step": 730
},
{
"epoch": 16.82,
"learning_rate": 1.5468184205806646e-05,
"loss": 0.1012,
"step": 740
},
{
"epoch": 17.05,
"learning_rate": 1.534433921739105e-05,
"loss": 0.1047,
"step": 750
},
{
"epoch": 17.27,
"learning_rate": 1.5219335992900282e-05,
"loss": 0.0992,
"step": 760
},
{
"epoch": 17.5,
"learning_rate": 1.5093201623287631e-05,
"loss": 0.1063,
"step": 770
},
{
"epoch": 17.73,
"learning_rate": 1.4965963444650465e-05,
"loss": 0.0863,
"step": 780
},
{
"epoch": 17.95,
"learning_rate": 1.4837649032305885e-05,
"loss": 0.0949,
"step": 790
},
{
"epoch": 18.18,
"learning_rate": 1.4708286194814565e-05,
"loss": 0.0829,
"step": 800
},
{
"epoch": 18.41,
"learning_rate": 1.4577902967953995e-05,
"loss": 0.0892,
"step": 810
},
{
"epoch": 18.64,
"learning_rate": 1.4446527608642538e-05,
"loss": 0.0972,
"step": 820
},
{
"epoch": 18.86,
"learning_rate": 1.4314188588815514e-05,
"loss": 0.0848,
"step": 830
},
{
"epoch": 19.09,
"learning_rate": 1.4180914589254716e-05,
"loss": 0.0849,
"step": 840
},
{
"epoch": 19.32,
"learning_rate": 1.4046734493372646e-05,
"loss": 0.0877,
"step": 850
},
{
"epoch": 19.55,
"learning_rate": 1.3911677380952853e-05,
"loss": 0.0877,
"step": 860
},
{
"epoch": 19.77,
"learning_rate": 1.3775772521847683e-05,
"loss": 0.0754,
"step": 870
},
{
"epoch": 20.0,
"learning_rate": 1.3639049369634878e-05,
"loss": 0.0901,
"step": 880
},
{
"epoch": 20.23,
"learning_rate": 1.3501537555234323e-05,
"loss": 0.0826,
"step": 890
},
{
"epoch": 20.45,
"learning_rate": 1.3363266880486388e-05,
"loss": 0.0858,
"step": 900
},
{
"epoch": 20.68,
"learning_rate": 1.3224267311693186e-05,
"loss": 0.0791,
"step": 910
},
{
"epoch": 20.91,
"learning_rate": 1.308456897312425e-05,
"loss": 0.0756,
"step": 920
},
{
"epoch": 21.14,
"learning_rate": 1.2944202140487905e-05,
"loss": 0.0683,
"step": 930
},
{
"epoch": 21.36,
"learning_rate": 1.2803197234369878e-05,
"loss": 0.0806,
"step": 940
},
{
"epoch": 21.59,
"learning_rate": 1.2661584813640485e-05,
"loss": 0.0841,
"step": 950
},
{
"epoch": 21.82,
"learning_rate": 1.2519395568831839e-05,
"loss": 0.0917,
"step": 960
},
{
"epoch": 22.05,
"learning_rate": 1.2376660315486535e-05,
"loss": 0.0708,
"step": 970
},
{
"epoch": 22.27,
"learning_rate": 1.223340998747927e-05,
"loss": 0.0768,
"step": 980
},
{
"epoch": 22.5,
"learning_rate": 1.2089675630312755e-05,
"loss": 0.0834,
"step": 990
},
{
"epoch": 22.73,
"learning_rate": 1.1945488394389479e-05,
"loss": 0.0769,
"step": 1000
},
{
"epoch": 22.95,
"learning_rate": 1.1800879528260761e-05,
"loss": 0.0751,
"step": 1010
},
{
"epoch": 23.18,
"learning_rate": 1.1655880371854454e-05,
"loss": 0.08,
"step": 1020
},
{
"epoch": 23.41,
"learning_rate": 1.1510522349682922e-05,
"loss": 0.0603,
"step": 1030
},
{
"epoch": 23.64,
"learning_rate": 1.1364836964032658e-05,
"loss": 0.0685,
"step": 1040
},
{
"epoch": 23.86,
"learning_rate": 1.1218855788137016e-05,
"loss": 0.0735,
"step": 1050
},
{
"epoch": 24.09,
"learning_rate": 1.107261045933363e-05,
"loss": 0.0615,
"step": 1060
},
{
"epoch": 24.32,
"learning_rate": 1.092613267220788e-05,
"loss": 0.0678,
"step": 1070
},
{
"epoch": 24.55,
"learning_rate": 1.0779454171723994e-05,
"loss": 0.0687,
"step": 1080
},
{
"epoch": 24.77,
"learning_rate": 1.0632606746345203e-05,
"loss": 0.0669,
"step": 1090
},
{
"epoch": 25.0,
"learning_rate": 1.0485622221144485e-05,
"loss": 0.071,
"step": 1100
},
{
"epoch": 25.23,
"learning_rate": 1.0338532450907373e-05,
"loss": 0.0708,
"step": 1110
},
{
"epoch": 25.45,
"learning_rate": 1.0191369313228319e-05,
"loss": 0.0677,
"step": 1120
},
{
"epoch": 25.68,
"learning_rate": 1.0044164701602111e-05,
"loss": 0.0738,
"step": 1130
},
{
"epoch": 25.91,
"learning_rate": 9.896950518511863e-06,
"loss": 0.0753,
"step": 1140
},
{
"epoch": 26.14,
"learning_rate": 9.749758668515027e-06,
"loss": 0.0689,
"step": 1150
},
{
"epoch": 26.36,
"learning_rate": 9.602621051328998e-06,
"loss": 0.0688,
"step": 1160
},
{
"epoch": 26.59,
"learning_rate": 9.455569554917701e-06,
"loss": 0.0781,
"step": 1170
},
{
"epoch": 26.82,
"learning_rate": 9.308636048580813e-06,
"loss": 0.0635,
"step": 1180
},
{
"epoch": 27.05,
"learning_rate": 9.161852376046953e-06,
"loss": 0.068,
"step": 1190
},
{
"epoch": 27.27,
"learning_rate": 9.015250348572452e-06,
"loss": 0.0602,
"step": 1200
},
{
"epoch": 27.5,
"learning_rate": 8.868861738047158e-06,
"loss": 0.0502,
"step": 1210
},
{
"epoch": 27.73,
"learning_rate": 8.72271827010876e-06,
"loss": 0.0531,
"step": 1220
},
{
"epoch": 27.95,
"learning_rate": 8.576851617267151e-06,
"loss": 0.0613,
"step": 1230
},
{
"epoch": 28.18,
"learning_rate": 8.431293392040283e-06,
"loss": 0.0636,
"step": 1240
},
{
"epoch": 28.41,
"learning_rate": 8.286075140103058e-06,
"loss": 0.0618,
"step": 1250
},
{
"epoch": 28.64,
"learning_rate": 8.141228333450673e-06,
"loss": 0.0652,
"step": 1260
},
{
"epoch": 28.86,
"learning_rate": 7.99678436357794e-06,
"loss": 0.0742,
"step": 1270
},
{
"epoch": 29.09,
"learning_rate": 7.852774534676073e-06,
"loss": 0.0529,
"step": 1280
},
{
"epoch": 29.32,
"learning_rate": 7.709230056848356e-06,
"loss": 0.0441,
"step": 1290
},
{
"epoch": 29.55,
"learning_rate": 7.5661820393462605e-06,
"loss": 0.0575,
"step": 1300
},
{
"epoch": 29.77,
"learning_rate": 7.423661483827357e-06,
"loss": 0.0552,
"step": 1310
},
{
"epoch": 30.0,
"learning_rate": 7.2816992776365714e-06,
"loss": 0.0518,
"step": 1320
},
{
"epoch": 30.23,
"learning_rate": 7.1403261871122466e-06,
"loss": 0.0631,
"step": 1330
},
{
"epoch": 30.45,
"learning_rate": 6.999572850918357e-06,
"loss": 0.0496,
"step": 1340
},
{
"epoch": 30.68,
"learning_rate": 6.859469773404471e-06,
"loss": 0.0575,
"step": 1350
},
{
"epoch": 30.91,
"learning_rate": 6.720047317994775e-06,
"loss": 0.0547,
"step": 1360
},
{
"epoch": 31.14,
"learning_rate": 6.581335700607632e-06,
"loss": 0.0537,
"step": 1370
},
{
"epoch": 31.36,
"learning_rate": 6.443364983107156e-06,
"loss": 0.0531,
"step": 1380
},
{
"epoch": 31.59,
"learning_rate": 6.306165066788121e-06,
"loss": 0.0525,
"step": 1390
},
{
"epoch": 31.82,
"learning_rate": 6.169765685895703e-06,
"loss": 0.0512,
"step": 1400
},
{
"epoch": 32.05,
"learning_rate": 6.034196401181414e-06,
"loss": 0.0506,
"step": 1410
},
{
"epoch": 32.27,
"learning_rate": 5.899486593496625e-06,
"loss": 0.0472,
"step": 1420
},
{
"epoch": 32.5,
"learning_rate": 5.765665457425102e-06,
"loss": 0.0538,
"step": 1430
},
{
"epoch": 32.73,
"learning_rate": 5.6327619949558806e-06,
"loss": 0.0565,
"step": 1440
},
{
"epoch": 32.95,
"learning_rate": 5.500805009197916e-06,
"loss": 0.042,
"step": 1450
},
{
"epoch": 33.18,
"learning_rate": 5.369823098137803e-06,
"loss": 0.04,
"step": 1460
},
{
"epoch": 33.41,
"learning_rate": 5.23984464844195e-06,
"loss": 0.0512,
"step": 1470
},
{
"epoch": 33.64,
"learning_rate": 5.1108978293045915e-06,
"loss": 0.0399,
"step": 1480
},
{
"epoch": 33.86,
"learning_rate": 4.983010586342876e-06,
"loss": 0.0493,
"step": 1490
},
{
"epoch": 34.09,
"learning_rate": 4.856210635540452e-06,
"loss": 0.0454,
"step": 1500
},
{
"epoch": 34.32,
"learning_rate": 4.730525457240796e-06,
"loss": 0.0414,
"step": 1510
},
{
"epoch": 34.55,
"learning_rate": 4.605982290191623e-06,
"loss": 0.045,
"step": 1520
},
{
"epoch": 34.77,
"learning_rate": 4.482608125641633e-06,
"loss": 0.0505,
"step": 1530
},
{
"epoch": 35.0,
"learning_rate": 4.360429701490935e-06,
"loss": 0.0483,
"step": 1540
},
{
"epoch": 35.23,
"learning_rate": 4.239473496496345e-06,
"loss": 0.0458,
"step": 1550
},
{
"epoch": 35.45,
"learning_rate": 4.119765724532843e-06,
"loss": 0.0391,
"step": 1560
},
{
"epoch": 35.68,
"learning_rate": 4.001332328912475e-06,
"loss": 0.0434,
"step": 1570
},
{
"epoch": 35.91,
"learning_rate": 3.884198976761846e-06,
"loss": 0.0516,
"step": 1580
},
{
"epoch": 36.14,
"learning_rate": 3.7683910534594957e-06,
"loss": 0.0438,
"step": 1590
},
{
"epoch": 36.36,
"learning_rate": 3.6539336571343177e-06,
"loss": 0.0424,
"step": 1600
},
{
"epoch": 36.59,
"learning_rate": 3.540851593226261e-06,
"loss": 0.0468,
"step": 1610
},
{
"epoch": 36.82,
"learning_rate": 3.429169369110422e-06,
"loss": 0.0413,
"step": 1620
},
{
"epoch": 37.05,
"learning_rate": 3.3189111887857773e-06,
"loss": 0.0479,
"step": 1630
},
{
"epoch": 37.27,
"learning_rate": 3.2101009476296306e-06,
"loss": 0.0402,
"step": 1640
},
{
"epoch": 37.5,
"learning_rate": 3.1027622272189572e-06,
"loss": 0.0428,
"step": 1650
},
{
"epoch": 37.73,
"learning_rate": 2.996918290219769e-06,
"loss": 0.0394,
"step": 1660
},
{
"epoch": 37.95,
"learning_rate": 2.8925920753455625e-06,
"loss": 0.0359,
"step": 1670
},
{
"epoch": 38.18,
"learning_rate": 2.7898061923860153e-06,
"loss": 0.0404,
"step": 1680
},
{
"epoch": 38.41,
"learning_rate": 2.688582917306938e-06,
"loss": 0.0439,
"step": 1690
},
{
"epoch": 38.64,
"learning_rate": 2.5889441874225676e-06,
"loss": 0.0396,
"step": 1700
},
{
"epoch": 38.86,
"learning_rate": 2.4909115966412957e-06,
"loss": 0.0462,
"step": 1710
},
{
"epoch": 39.09,
"learning_rate": 2.3945063907857647e-06,
"loss": 0.0466,
"step": 1720
},
{
"epoch": 39.32,
"learning_rate": 2.299749462988451e-06,
"loss": 0.031,
"step": 1730
},
{
"epoch": 39.55,
"learning_rate": 2.2066613491636568e-06,
"loss": 0.0409,
"step": 1740
},
{
"epoch": 39.77,
"learning_rate": 2.115262223556912e-06,
"loss": 0.0449,
"step": 1750
},
{
"epoch": 40.0,
"learning_rate": 2.025571894372794e-06,
"loss": 0.0373,
"step": 1760
},
{
"epoch": 40.23,
"learning_rate": 1.9376097994820286e-06,
"loss": 0.0422,
"step": 1770
},
{
"epoch": 40.45,
"learning_rate": 1.8513950022088966e-06,
"loss": 0.0508,
"step": 1780
},
{
"epoch": 40.68,
"learning_rate": 1.7669461871997817e-06,
"loss": 0.0424,
"step": 1790
},
{
"epoch": 40.91,
"learning_rate": 1.6842816563737875e-06,
"loss": 0.0403,
"step": 1800
},
{
"epoch": 41.14,
"learning_rate": 1.603419324956328e-06,
"loss": 0.0419,
"step": 1810
},
{
"epoch": 41.36,
"learning_rate": 1.5243767175964818e-06,
"loss": 0.0334,
"step": 1820
},
{
"epoch": 41.59,
"learning_rate": 1.4471709645690336e-06,
"loss": 0.0304,
"step": 1830
},
{
"epoch": 41.82,
"learning_rate": 1.3718187980619557e-06,
"loss": 0.0413,
"step": 1840
},
{
"epoch": 42.05,
"learning_rate": 1.298336548550172e-06,
"loss": 0.0423,
"step": 1850
},
{
"epoch": 42.27,
"learning_rate": 1.226740141256395e-06,
"loss": 0.0378,
"step": 1860
},
{
"epoch": 42.5,
"learning_rate": 1.1570450926997657e-06,
"loss": 0.0316,
"step": 1870
},
{
"epoch": 42.73,
"learning_rate": 1.0892665073330932e-06,
"loss": 0.0366,
"step": 1880
},
{
"epoch": 42.95,
"learning_rate": 1.023419074269384e-06,
"loss": 0.0335,
"step": 1890
},
{
"epoch": 43.18,
"learning_rate": 9.595170640983786e-07,
"loss": 0.0352,
"step": 1900
},
{
"epoch": 43.41,
"learning_rate": 8.975743257938186e-07,
"loss": 0.0399,
"step": 1910
},
{
"epoch": 43.64,
"learning_rate": 8.37604283712048e-07,
"loss": 0.0344,
"step": 1920
},
{
"epoch": 43.86,
"learning_rate": 7.796199346826727e-07,
"loss": 0.04,
"step": 1930
},
{
"epoch": 44.09,
"learning_rate": 7.236338451918634e-07,
"loss": 0.0355,
"step": 1940
},
{
"epoch": 44.32,
"learning_rate": 6.696581486589071e-07,
"loss": 0.0363,
"step": 1950
},
{
"epoch": 44.55,
"learning_rate": 6.177045428066397e-07,
"loss": 0.0266,
"step": 1960
},
{
"epoch": 44.77,
"learning_rate": 5.677842871262895e-07,
"loss": 0.0303,
"step": 1970
},
{
"epoch": 45.0,
"learning_rate": 5.199082004372958e-07,
"loss": 0.0318,
"step": 1980
},
{
"epoch": 45.23,
"learning_rate": 4.7408665854263067e-07,
"loss": 0.0297,
"step": 1990
},
{
"epoch": 45.45,
"learning_rate": 4.3032959198013646e-07,
"loss": 0.0346,
"step": 2000
},
{
"epoch": 45.68,
"learning_rate": 3.8864648387036074e-07,
"loss": 0.0359,
"step": 2010
},
{
"epoch": 45.91,
"learning_rate": 3.490463678613487e-07,
"loss": 0.0348,
"step": 2020
},
{
"epoch": 46.14,
"learning_rate": 3.1153782617086126e-07,
"loss": 0.0387,
"step": 2030
},
{
"epoch": 46.36,
"learning_rate": 2.761289877264139e-07,
"loss": 0.0304,
"step": 2040
},
{
"epoch": 46.59,
"learning_rate": 2.4282752640355846e-07,
"loss": 0.0337,
"step": 2050
},
{
"epoch": 46.82,
"learning_rate": 2.1164065936278732e-07,
"loss": 0.0432,
"step": 2060
},
{
"epoch": 47.05,
"learning_rate": 1.8257514548541632e-07,
"loss": 0.0298,
"step": 2070
},
{
"epoch": 47.27,
"learning_rate": 1.5563728390878496e-07,
"loss": 0.0322,
"step": 2080
},
{
"epoch": 47.5,
"learning_rate": 1.30832912661093e-07,
"loss": 0.0305,
"step": 2090
},
{
"epoch": 47.73,
"learning_rate": 1.0816740739617471e-07,
"loss": 0.0333,
"step": 2100
},
{
"epoch": 47.95,
"learning_rate": 8.764568022847198e-08,
"loss": 0.0293,
"step": 2110
},
{
"epoch": 48.18,
"learning_rate": 6.92721786684769e-08,
"loss": 0.0428,
"step": 2120
},
{
"epoch": 48.41,
"learning_rate": 5.305088465885067e-08,
"loss": 0.0363,
"step": 2130
},
{
"epoch": 48.64,
"learning_rate": 3.898531371145597e-08,
"loss": 0.0329,
"step": 2140
},
{
"epoch": 48.86,
"learning_rate": 2.7078514145459924e-08,
"loss": 0.0308,
"step": 2150
},
{
"epoch": 49.09,
"learning_rate": 1.7333066426706845e-08,
"loss": 0.0318,
"step": 2160
},
{
"epoch": 49.32,
"learning_rate": 9.75108260846569e-09,
"loss": 0.03,
"step": 2170
},
{
"epoch": 49.55,
"learning_rate": 4.334205873705033e-09,
"loss": 0.0302,
"step": 2180
},
{
"epoch": 49.77,
"learning_rate": 1.0836101789768462e-09,
"loss": 0.0264,
"step": 2190
},
{
"epoch": 50.0,
"learning_rate": 0.0,
"loss": 0.0382,
"step": 2200
},
{
"epoch": 50.0,
"step": 2200,
"total_flos": 3.745171436910674e+17,
"train_loss": 0.13681490471417254,
"train_runtime": 4969.2412,
"train_samples_per_second": 13.865,
"train_steps_per_second": 0.443
}
],
"max_steps": 2200,
"num_train_epochs": 50,
"total_flos": 3.745171436910674e+17,
"trial_name": null,
"trial_params": null
}