toucan-1.2B / trainer_state.json
elmadany
toucan-1.2B
cb366c3
{
"best_metric": 2710.1728,
"best_model_checkpoint": "/jasmine/s-elmadany/ds_UBC_AfroT5_large_1M_2e-5/checkpoint-122140",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 122140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9983625347961356e-05,
"loss": 8.4624,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 1.9967250695922714e-05,
"loss": 5.9771,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.995087604388407e-05,
"loss": 5.7693,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.9934501391845424e-05,
"loss": 5.0828,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.991812673980678e-05,
"loss": 4.8058,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 1.9901752087768136e-05,
"loss": 4.5844,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 1.988537743572949e-05,
"loss": 4.4461,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 1.986900278369085e-05,
"loss": 4.3994,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 1.9852628131652203e-05,
"loss": 4.3327,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 1.983625347961356e-05,
"loss": 4.2567,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 1.9819878827574916e-05,
"loss": 4.2223,
"step": 1100
},
{
"epoch": 0.1,
"learning_rate": 1.980350417553627e-05,
"loss": 4.1905,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 1.978712952349763e-05,
"loss": 4.1563,
"step": 1300
},
{
"epoch": 0.11,
"learning_rate": 1.9770754871458983e-05,
"loss": 4.1126,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 1.9754380219420338e-05,
"loss": 4.0979,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 1.9738005567381696e-05,
"loss": 4.0696,
"step": 1600
},
{
"epoch": 0.14,
"learning_rate": 1.972163091534305e-05,
"loss": 4.0446,
"step": 1700
},
{
"epoch": 0.15,
"learning_rate": 1.9705256263304405e-05,
"loss": 4.0218,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 1.9688881611265763e-05,
"loss": 3.9976,
"step": 1900
},
{
"epoch": 0.16,
"learning_rate": 1.9672506959227118e-05,
"loss": 3.9459,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 1.9656132307188473e-05,
"loss": 3.9501,
"step": 2100
},
{
"epoch": 0.18,
"learning_rate": 1.963975765514983e-05,
"loss": 3.9454,
"step": 2200
},
{
"epoch": 0.19,
"learning_rate": 1.9623383003111185e-05,
"loss": 3.9145,
"step": 2300
},
{
"epoch": 0.2,
"learning_rate": 1.960700835107254e-05,
"loss": 3.9358,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 1.9590633699033898e-05,
"loss": 3.8542,
"step": 2500
},
{
"epoch": 0.21,
"learning_rate": 1.9574259046995253e-05,
"loss": 3.8823,
"step": 2600
},
{
"epoch": 0.22,
"learning_rate": 1.9557884394956607e-05,
"loss": 3.8611,
"step": 2700
},
{
"epoch": 0.23,
"learning_rate": 1.9541509742917965e-05,
"loss": 3.8609,
"step": 2800
},
{
"epoch": 0.24,
"learning_rate": 1.952513509087932e-05,
"loss": 3.8498,
"step": 2900
},
{
"epoch": 0.25,
"learning_rate": 1.9508760438840678e-05,
"loss": 3.8349,
"step": 3000
},
{
"epoch": 0.25,
"learning_rate": 1.9492385786802032e-05,
"loss": 3.8498,
"step": 3100
},
{
"epoch": 0.26,
"learning_rate": 1.9476011134763387e-05,
"loss": 3.7941,
"step": 3200
},
{
"epoch": 0.27,
"learning_rate": 1.9459636482724745e-05,
"loss": 3.8308,
"step": 3300
},
{
"epoch": 0.28,
"learning_rate": 1.94432618306861e-05,
"loss": 3.7989,
"step": 3400
},
{
"epoch": 0.29,
"learning_rate": 1.9426887178647454e-05,
"loss": 3.8041,
"step": 3500
},
{
"epoch": 0.29,
"learning_rate": 1.9410512526608812e-05,
"loss": 3.7968,
"step": 3600
},
{
"epoch": 0.3,
"learning_rate": 1.9394137874570167e-05,
"loss": 3.7609,
"step": 3700
},
{
"epoch": 0.31,
"learning_rate": 1.937776322253152e-05,
"loss": 3.7525,
"step": 3800
},
{
"epoch": 0.32,
"learning_rate": 1.936138857049288e-05,
"loss": 3.7561,
"step": 3900
},
{
"epoch": 0.33,
"learning_rate": 1.9345013918454234e-05,
"loss": 3.7288,
"step": 4000
},
{
"epoch": 0.34,
"learning_rate": 1.932863926641559e-05,
"loss": 3.748,
"step": 4100
},
{
"epoch": 0.34,
"learning_rate": 1.9312264614376947e-05,
"loss": 3.7518,
"step": 4200
},
{
"epoch": 0.35,
"learning_rate": 1.92958899623383e-05,
"loss": 3.718,
"step": 4300
},
{
"epoch": 0.36,
"learning_rate": 1.9279515310299656e-05,
"loss": 3.7124,
"step": 4400
},
{
"epoch": 0.37,
"learning_rate": 1.9263140658261014e-05,
"loss": 3.7118,
"step": 4500
},
{
"epoch": 0.38,
"learning_rate": 1.924676600622237e-05,
"loss": 3.6885,
"step": 4600
},
{
"epoch": 0.38,
"learning_rate": 1.9230391354183727e-05,
"loss": 3.7057,
"step": 4700
},
{
"epoch": 0.39,
"learning_rate": 1.921401670214508e-05,
"loss": 3.6812,
"step": 4800
},
{
"epoch": 0.4,
"learning_rate": 1.9197642050106436e-05,
"loss": 3.6831,
"step": 4900
},
{
"epoch": 0.41,
"learning_rate": 1.9181267398067794e-05,
"loss": 3.6705,
"step": 5000
},
{
"epoch": 0.42,
"learning_rate": 1.916489274602915e-05,
"loss": 3.687,
"step": 5100
},
{
"epoch": 0.43,
"learning_rate": 1.9148518093990503e-05,
"loss": 3.677,
"step": 5200
},
{
"epoch": 0.43,
"learning_rate": 1.913214344195186e-05,
"loss": 3.6632,
"step": 5300
},
{
"epoch": 0.44,
"learning_rate": 1.9115768789913216e-05,
"loss": 3.6626,
"step": 5400
},
{
"epoch": 0.45,
"learning_rate": 1.909939413787457e-05,
"loss": 3.6451,
"step": 5500
},
{
"epoch": 0.46,
"learning_rate": 1.908301948583593e-05,
"loss": 3.6283,
"step": 5600
},
{
"epoch": 0.47,
"learning_rate": 1.9066644833797283e-05,
"loss": 3.6321,
"step": 5700
},
{
"epoch": 0.47,
"learning_rate": 1.9050270181758638e-05,
"loss": 3.6007,
"step": 5800
},
{
"epoch": 0.48,
"learning_rate": 1.9033895529719996e-05,
"loss": 3.6293,
"step": 5900
},
{
"epoch": 0.49,
"learning_rate": 1.901752087768135e-05,
"loss": 3.5916,
"step": 6000
},
{
"epoch": 0.5,
"learning_rate": 1.9001146225642705e-05,
"loss": 3.6187,
"step": 6100
},
{
"epoch": 0.51,
"learning_rate": 1.8984771573604063e-05,
"loss": 3.6006,
"step": 6200
},
{
"epoch": 0.52,
"learning_rate": 1.8968396921565418e-05,
"loss": 3.5998,
"step": 6300
},
{
"epoch": 0.52,
"learning_rate": 1.8952022269526773e-05,
"loss": 3.5517,
"step": 6400
},
{
"epoch": 0.53,
"learning_rate": 1.893564761748813e-05,
"loss": 3.5713,
"step": 6500
},
{
"epoch": 0.54,
"learning_rate": 1.8919272965449485e-05,
"loss": 3.5718,
"step": 6600
},
{
"epoch": 0.55,
"learning_rate": 1.8902898313410843e-05,
"loss": 3.5697,
"step": 6700
},
{
"epoch": 0.56,
"learning_rate": 1.8886523661372198e-05,
"loss": 3.5477,
"step": 6800
},
{
"epoch": 0.56,
"learning_rate": 1.8870149009333552e-05,
"loss": 3.5405,
"step": 6900
},
{
"epoch": 0.57,
"learning_rate": 1.885377435729491e-05,
"loss": 3.5724,
"step": 7000
},
{
"epoch": 0.58,
"learning_rate": 1.8837399705256265e-05,
"loss": 3.544,
"step": 7100
},
{
"epoch": 0.59,
"learning_rate": 1.882102505321762e-05,
"loss": 3.5689,
"step": 7200
},
{
"epoch": 0.6,
"learning_rate": 1.8804650401178978e-05,
"loss": 3.5258,
"step": 7300
},
{
"epoch": 0.61,
"learning_rate": 1.8788275749140332e-05,
"loss": 3.5395,
"step": 7400
},
{
"epoch": 0.61,
"learning_rate": 1.8771901097101687e-05,
"loss": 3.5342,
"step": 7500
},
{
"epoch": 0.62,
"learning_rate": 1.8755526445063045e-05,
"loss": 3.5367,
"step": 7600
},
{
"epoch": 0.63,
"learning_rate": 1.87391517930244e-05,
"loss": 3.5328,
"step": 7700
},
{
"epoch": 0.64,
"learning_rate": 1.8722777140985754e-05,
"loss": 3.5402,
"step": 7800
},
{
"epoch": 0.65,
"learning_rate": 1.8706402488947112e-05,
"loss": 3.5269,
"step": 7900
},
{
"epoch": 0.65,
"learning_rate": 1.8690027836908467e-05,
"loss": 3.5107,
"step": 8000
},
{
"epoch": 0.66,
"learning_rate": 1.867365318486982e-05,
"loss": 3.5119,
"step": 8100
},
{
"epoch": 0.67,
"learning_rate": 1.8657278532831176e-05,
"loss": 3.5255,
"step": 8200
},
{
"epoch": 0.68,
"learning_rate": 1.8640903880792534e-05,
"loss": 3.506,
"step": 8300
},
{
"epoch": 0.69,
"learning_rate": 1.8624529228753892e-05,
"loss": 3.4903,
"step": 8400
},
{
"epoch": 0.7,
"learning_rate": 1.8608154576715247e-05,
"loss": 3.531,
"step": 8500
},
{
"epoch": 0.7,
"learning_rate": 1.85917799246766e-05,
"loss": 3.5041,
"step": 8600
},
{
"epoch": 0.71,
"learning_rate": 1.857540527263796e-05,
"loss": 3.4605,
"step": 8700
},
{
"epoch": 0.72,
"learning_rate": 1.8559030620599314e-05,
"loss": 3.4925,
"step": 8800
},
{
"epoch": 0.73,
"learning_rate": 1.854265596856067e-05,
"loss": 3.4748,
"step": 8900
},
{
"epoch": 0.74,
"learning_rate": 1.8526281316522027e-05,
"loss": 3.4565,
"step": 9000
},
{
"epoch": 0.75,
"learning_rate": 1.850990666448338e-05,
"loss": 3.4581,
"step": 9100
},
{
"epoch": 0.75,
"learning_rate": 1.8493532012444736e-05,
"loss": 3.4933,
"step": 9200
},
{
"epoch": 0.76,
"learning_rate": 1.8477157360406094e-05,
"loss": 3.4837,
"step": 9300
},
{
"epoch": 0.77,
"learning_rate": 1.846078270836745e-05,
"loss": 3.4716,
"step": 9400
},
{
"epoch": 0.78,
"learning_rate": 1.8444408056328803e-05,
"loss": 3.4574,
"step": 9500
},
{
"epoch": 0.79,
"learning_rate": 1.842803340429016e-05,
"loss": 3.4469,
"step": 9600
},
{
"epoch": 0.79,
"learning_rate": 1.8411658752251516e-05,
"loss": 3.4244,
"step": 9700
},
{
"epoch": 0.8,
"learning_rate": 1.839528410021287e-05,
"loss": 3.3977,
"step": 9800
},
{
"epoch": 0.81,
"learning_rate": 1.837890944817423e-05,
"loss": 3.4462,
"step": 9900
},
{
"epoch": 0.82,
"learning_rate": 1.8362534796135583e-05,
"loss": 3.4458,
"step": 10000
},
{
"epoch": 0.83,
"learning_rate": 1.8346160144096938e-05,
"loss": 3.4315,
"step": 10100
},
{
"epoch": 0.84,
"learning_rate": 1.8329785492058296e-05,
"loss": 3.4124,
"step": 10200
},
{
"epoch": 0.84,
"learning_rate": 1.831341084001965e-05,
"loss": 3.409,
"step": 10300
},
{
"epoch": 0.85,
"learning_rate": 1.829703618798101e-05,
"loss": 3.4229,
"step": 10400
},
{
"epoch": 0.86,
"learning_rate": 1.8280661535942363e-05,
"loss": 3.4055,
"step": 10500
},
{
"epoch": 0.87,
"learning_rate": 1.8264286883903718e-05,
"loss": 3.3975,
"step": 10600
},
{
"epoch": 0.88,
"learning_rate": 1.8247912231865076e-05,
"loss": 3.4197,
"step": 10700
},
{
"epoch": 0.88,
"learning_rate": 1.823153757982643e-05,
"loss": 3.395,
"step": 10800
},
{
"epoch": 0.89,
"learning_rate": 1.8215162927787785e-05,
"loss": 3.4371,
"step": 10900
},
{
"epoch": 0.9,
"learning_rate": 1.8198788275749143e-05,
"loss": 3.3775,
"step": 11000
},
{
"epoch": 0.91,
"learning_rate": 1.8182413623710498e-05,
"loss": 3.3925,
"step": 11100
},
{
"epoch": 0.92,
"learning_rate": 1.8166038971671852e-05,
"loss": 3.4044,
"step": 11200
},
{
"epoch": 0.93,
"learning_rate": 1.814966431963321e-05,
"loss": 3.4044,
"step": 11300
},
{
"epoch": 0.93,
"learning_rate": 1.8133289667594565e-05,
"loss": 3.3841,
"step": 11400
},
{
"epoch": 0.94,
"learning_rate": 1.811691501555592e-05,
"loss": 3.4051,
"step": 11500
},
{
"epoch": 0.95,
"learning_rate": 1.8100540363517278e-05,
"loss": 3.3812,
"step": 11600
},
{
"epoch": 0.96,
"learning_rate": 1.8084165711478632e-05,
"loss": 3.3966,
"step": 11700
},
{
"epoch": 0.97,
"learning_rate": 1.8067791059439987e-05,
"loss": 3.4188,
"step": 11800
},
{
"epoch": 0.97,
"learning_rate": 1.805141640740134e-05,
"loss": 3.3683,
"step": 11900
},
{
"epoch": 0.98,
"learning_rate": 1.80350417553627e-05,
"loss": 3.3872,
"step": 12000
},
{
"epoch": 0.99,
"learning_rate": 1.8018667103324058e-05,
"loss": 3.3407,
"step": 12100
},
{
"epoch": 1.0,
"learning_rate": 1.8002292451285412e-05,
"loss": 3.3987,
"step": 12200
},
{
"epoch": 1.0,
"eval_bleu": 732.9294,
"eval_chrf": 2404.1983,
"eval_chrf_plus": 2251.1143,
"eval_gen_len": 14.592,
"eval_loss": 2.8766348361968994,
"eval_runtime": 38.5936,
"eval_samples_per_second": 25.911,
"eval_steps_per_second": 1.088,
"step": 12214
},
{
"epoch": 1.01,
"learning_rate": 1.7985917799246767e-05,
"loss": 3.3044,
"step": 12300
},
{
"epoch": 1.02,
"learning_rate": 1.7969543147208125e-05,
"loss": 3.2769,
"step": 12400
},
{
"epoch": 1.02,
"learning_rate": 1.795316849516948e-05,
"loss": 3.3158,
"step": 12500
},
{
"epoch": 1.03,
"learning_rate": 1.7936793843130834e-05,
"loss": 3.3013,
"step": 12600
},
{
"epoch": 1.04,
"learning_rate": 1.7920419191092192e-05,
"loss": 3.3322,
"step": 12700
},
{
"epoch": 1.05,
"learning_rate": 1.7904044539053547e-05,
"loss": 3.2939,
"step": 12800
},
{
"epoch": 1.06,
"learning_rate": 1.78876698870149e-05,
"loss": 3.2753,
"step": 12900
},
{
"epoch": 1.06,
"learning_rate": 1.787129523497626e-05,
"loss": 3.3128,
"step": 13000
},
{
"epoch": 1.07,
"learning_rate": 1.7854920582937614e-05,
"loss": 3.2881,
"step": 13100
},
{
"epoch": 1.08,
"learning_rate": 1.783854593089897e-05,
"loss": 3.2776,
"step": 13200
},
{
"epoch": 1.09,
"learning_rate": 1.7822171278860327e-05,
"loss": 3.2845,
"step": 13300
},
{
"epoch": 1.1,
"learning_rate": 1.780579662682168e-05,
"loss": 3.2722,
"step": 13400
},
{
"epoch": 1.11,
"learning_rate": 1.7789421974783036e-05,
"loss": 3.2907,
"step": 13500
},
{
"epoch": 1.11,
"learning_rate": 1.777304732274439e-05,
"loss": 3.29,
"step": 13600
},
{
"epoch": 1.12,
"learning_rate": 1.775667267070575e-05,
"loss": 3.2576,
"step": 13700
},
{
"epoch": 1.13,
"learning_rate": 1.7740298018667107e-05,
"loss": 3.2402,
"step": 13800
},
{
"epoch": 1.14,
"learning_rate": 1.772392336662846e-05,
"loss": 3.2807,
"step": 13900
},
{
"epoch": 1.15,
"learning_rate": 1.7707548714589816e-05,
"loss": 3.268,
"step": 14000
},
{
"epoch": 1.15,
"learning_rate": 1.7691174062551174e-05,
"loss": 3.2347,
"step": 14100
},
{
"epoch": 1.16,
"learning_rate": 1.767479941051253e-05,
"loss": 3.2847,
"step": 14200
},
{
"epoch": 1.17,
"learning_rate": 1.7658424758473883e-05,
"loss": 3.2423,
"step": 14300
},
{
"epoch": 1.18,
"learning_rate": 1.764205010643524e-05,
"loss": 3.2641,
"step": 14400
},
{
"epoch": 1.19,
"learning_rate": 1.7625675454396596e-05,
"loss": 3.245,
"step": 14500
},
{
"epoch": 1.2,
"learning_rate": 1.760930080235795e-05,
"loss": 3.2686,
"step": 14600
},
{
"epoch": 1.2,
"learning_rate": 1.759292615031931e-05,
"loss": 3.1976,
"step": 14700
},
{
"epoch": 1.21,
"learning_rate": 1.7576551498280663e-05,
"loss": 3.2724,
"step": 14800
},
{
"epoch": 1.22,
"learning_rate": 1.7560176846242018e-05,
"loss": 3.2294,
"step": 14900
},
{
"epoch": 1.23,
"learning_rate": 1.7543802194203376e-05,
"loss": 3.2484,
"step": 15000
},
{
"epoch": 1.24,
"learning_rate": 1.752742754216473e-05,
"loss": 3.2628,
"step": 15100
},
{
"epoch": 1.24,
"learning_rate": 1.7511052890126085e-05,
"loss": 3.2275,
"step": 15200
},
{
"epoch": 1.25,
"learning_rate": 1.7494678238087443e-05,
"loss": 3.2636,
"step": 15300
},
{
"epoch": 1.26,
"learning_rate": 1.7478303586048798e-05,
"loss": 3.2635,
"step": 15400
},
{
"epoch": 1.27,
"learning_rate": 1.7461928934010152e-05,
"loss": 3.2374,
"step": 15500
},
{
"epoch": 1.28,
"learning_rate": 1.7445554281971507e-05,
"loss": 3.224,
"step": 15600
},
{
"epoch": 1.29,
"learning_rate": 1.7429179629932865e-05,
"loss": 3.2542,
"step": 15700
},
{
"epoch": 1.29,
"learning_rate": 1.7412804977894223e-05,
"loss": 3.2073,
"step": 15800
},
{
"epoch": 1.3,
"learning_rate": 1.7396430325855578e-05,
"loss": 3.2291,
"step": 15900
},
{
"epoch": 1.31,
"learning_rate": 1.7380055673816932e-05,
"loss": 3.2193,
"step": 16000
},
{
"epoch": 1.32,
"learning_rate": 1.736368102177829e-05,
"loss": 3.2367,
"step": 16100
},
{
"epoch": 1.33,
"learning_rate": 1.7347306369739645e-05,
"loss": 3.1989,
"step": 16200
},
{
"epoch": 1.33,
"learning_rate": 1.7330931717701e-05,
"loss": 3.2228,
"step": 16300
},
{
"epoch": 1.34,
"learning_rate": 1.7314557065662358e-05,
"loss": 3.1936,
"step": 16400
},
{
"epoch": 1.35,
"learning_rate": 1.7298182413623712e-05,
"loss": 3.2073,
"step": 16500
},
{
"epoch": 1.36,
"learning_rate": 1.7281807761585067e-05,
"loss": 3.22,
"step": 16600
},
{
"epoch": 1.37,
"learning_rate": 1.7265433109546425e-05,
"loss": 3.2036,
"step": 16700
},
{
"epoch": 1.38,
"learning_rate": 1.724905845750778e-05,
"loss": 3.2283,
"step": 16800
},
{
"epoch": 1.38,
"learning_rate": 1.7232683805469134e-05,
"loss": 3.253,
"step": 16900
},
{
"epoch": 1.39,
"learning_rate": 1.7216309153430492e-05,
"loss": 3.1681,
"step": 17000
},
{
"epoch": 1.4,
"learning_rate": 1.7199934501391847e-05,
"loss": 3.2035,
"step": 17100
},
{
"epoch": 1.41,
"learning_rate": 1.71835598493532e-05,
"loss": 3.2251,
"step": 17200
},
{
"epoch": 1.42,
"learning_rate": 1.7167185197314556e-05,
"loss": 3.1773,
"step": 17300
},
{
"epoch": 1.42,
"learning_rate": 1.7150810545275914e-05,
"loss": 3.1996,
"step": 17400
},
{
"epoch": 1.43,
"learning_rate": 1.7134435893237272e-05,
"loss": 3.178,
"step": 17500
},
{
"epoch": 1.44,
"learning_rate": 1.7118061241198627e-05,
"loss": 3.2248,
"step": 17600
},
{
"epoch": 1.45,
"learning_rate": 1.710168658915998e-05,
"loss": 3.2019,
"step": 17700
},
{
"epoch": 1.46,
"learning_rate": 1.708531193712134e-05,
"loss": 3.2058,
"step": 17800
},
{
"epoch": 1.47,
"learning_rate": 1.7068937285082694e-05,
"loss": 3.1742,
"step": 17900
},
{
"epoch": 1.47,
"learning_rate": 1.705256263304405e-05,
"loss": 3.1653,
"step": 18000
},
{
"epoch": 1.48,
"learning_rate": 1.7036187981005407e-05,
"loss": 3.2102,
"step": 18100
},
{
"epoch": 1.49,
"learning_rate": 1.701981332896676e-05,
"loss": 3.2096,
"step": 18200
},
{
"epoch": 1.5,
"learning_rate": 1.7003438676928116e-05,
"loss": 3.2129,
"step": 18300
},
{
"epoch": 1.51,
"learning_rate": 1.6987064024889474e-05,
"loss": 3.1804,
"step": 18400
},
{
"epoch": 1.51,
"learning_rate": 1.697068937285083e-05,
"loss": 3.1715,
"step": 18500
},
{
"epoch": 1.52,
"learning_rate": 1.6954314720812183e-05,
"loss": 3.1591,
"step": 18600
},
{
"epoch": 1.53,
"learning_rate": 1.693794006877354e-05,
"loss": 3.1861,
"step": 18700
},
{
"epoch": 1.54,
"learning_rate": 1.6921565416734896e-05,
"loss": 3.1293,
"step": 18800
},
{
"epoch": 1.55,
"learning_rate": 1.690519076469625e-05,
"loss": 3.1695,
"step": 18900
},
{
"epoch": 1.56,
"learning_rate": 1.6888816112657605e-05,
"loss": 3.2078,
"step": 19000
},
{
"epoch": 1.56,
"learning_rate": 1.6872441460618963e-05,
"loss": 3.1875,
"step": 19100
},
{
"epoch": 1.57,
"learning_rate": 1.6856066808580318e-05,
"loss": 3.141,
"step": 19200
},
{
"epoch": 1.58,
"learning_rate": 1.6839692156541672e-05,
"loss": 3.1861,
"step": 19300
},
{
"epoch": 1.59,
"learning_rate": 1.682331750450303e-05,
"loss": 3.1515,
"step": 19400
},
{
"epoch": 1.6,
"learning_rate": 1.680694285246439e-05,
"loss": 3.1628,
"step": 19500
},
{
"epoch": 1.6,
"learning_rate": 1.6790568200425743e-05,
"loss": 3.1899,
"step": 19600
},
{
"epoch": 1.61,
"learning_rate": 1.6774193548387098e-05,
"loss": 3.1687,
"step": 19700
},
{
"epoch": 1.62,
"learning_rate": 1.6757818896348456e-05,
"loss": 3.1699,
"step": 19800
},
{
"epoch": 1.63,
"learning_rate": 1.674144424430981e-05,
"loss": 3.1611,
"step": 19900
},
{
"epoch": 1.64,
"learning_rate": 1.6725069592271165e-05,
"loss": 3.1627,
"step": 20000
},
{
"epoch": 1.65,
"learning_rate": 1.6708694940232523e-05,
"loss": 3.1446,
"step": 20100
},
{
"epoch": 1.65,
"learning_rate": 1.6692320288193878e-05,
"loss": 3.1411,
"step": 20200
},
{
"epoch": 1.66,
"learning_rate": 1.6675945636155232e-05,
"loss": 3.1532,
"step": 20300
},
{
"epoch": 1.67,
"learning_rate": 1.665957098411659e-05,
"loss": 3.1618,
"step": 20400
},
{
"epoch": 1.68,
"learning_rate": 1.6643196332077945e-05,
"loss": 3.1464,
"step": 20500
},
{
"epoch": 1.69,
"learning_rate": 1.66268216800393e-05,
"loss": 3.1684,
"step": 20600
},
{
"epoch": 1.69,
"learning_rate": 1.6610447028000657e-05,
"loss": 3.1111,
"step": 20700
},
{
"epoch": 1.7,
"learning_rate": 1.6594072375962012e-05,
"loss": 3.1576,
"step": 20800
},
{
"epoch": 1.71,
"learning_rate": 1.6577697723923367e-05,
"loss": 3.1661,
"step": 20900
},
{
"epoch": 1.72,
"learning_rate": 1.656132307188472e-05,
"loss": 3.1532,
"step": 21000
},
{
"epoch": 1.73,
"learning_rate": 1.654494841984608e-05,
"loss": 3.1055,
"step": 21100
},
{
"epoch": 1.74,
"learning_rate": 1.6528573767807437e-05,
"loss": 3.1367,
"step": 21200
},
{
"epoch": 1.74,
"learning_rate": 1.6512199115768792e-05,
"loss": 3.1055,
"step": 21300
},
{
"epoch": 1.75,
"learning_rate": 1.6495824463730147e-05,
"loss": 3.1471,
"step": 21400
},
{
"epoch": 1.76,
"learning_rate": 1.6479449811691505e-05,
"loss": 3.1165,
"step": 21500
},
{
"epoch": 1.77,
"learning_rate": 1.646307515965286e-05,
"loss": 3.1307,
"step": 21600
},
{
"epoch": 1.78,
"learning_rate": 1.6446700507614214e-05,
"loss": 3.163,
"step": 21700
},
{
"epoch": 1.78,
"learning_rate": 1.6430325855575572e-05,
"loss": 3.1192,
"step": 21800
},
{
"epoch": 1.79,
"learning_rate": 1.6413951203536927e-05,
"loss": 3.0997,
"step": 21900
},
{
"epoch": 1.8,
"learning_rate": 1.639757655149828e-05,
"loss": 3.125,
"step": 22000
},
{
"epoch": 1.81,
"learning_rate": 1.638120189945964e-05,
"loss": 3.1122,
"step": 22100
},
{
"epoch": 1.82,
"learning_rate": 1.6364827247420994e-05,
"loss": 3.0875,
"step": 22200
},
{
"epoch": 1.83,
"learning_rate": 1.634845259538235e-05,
"loss": 3.0969,
"step": 22300
},
{
"epoch": 1.83,
"learning_rate": 1.6332077943343707e-05,
"loss": 3.1037,
"step": 22400
},
{
"epoch": 1.84,
"learning_rate": 1.631570329130506e-05,
"loss": 3.1123,
"step": 22500
},
{
"epoch": 1.85,
"learning_rate": 1.6299328639266416e-05,
"loss": 3.1142,
"step": 22600
},
{
"epoch": 1.86,
"learning_rate": 1.628295398722777e-05,
"loss": 3.1189,
"step": 22700
},
{
"epoch": 1.87,
"learning_rate": 1.626657933518913e-05,
"loss": 3.1587,
"step": 22800
},
{
"epoch": 1.87,
"learning_rate": 1.6250204683150483e-05,
"loss": 3.1438,
"step": 22900
},
{
"epoch": 1.88,
"learning_rate": 1.623383003111184e-05,
"loss": 3.109,
"step": 23000
},
{
"epoch": 1.89,
"learning_rate": 1.6217455379073196e-05,
"loss": 3.1217,
"step": 23100
},
{
"epoch": 1.9,
"learning_rate": 1.6201080727034554e-05,
"loss": 3.0996,
"step": 23200
},
{
"epoch": 1.91,
"learning_rate": 1.618470607499591e-05,
"loss": 3.0898,
"step": 23300
},
{
"epoch": 1.92,
"learning_rate": 1.6168331422957263e-05,
"loss": 3.0998,
"step": 23400
},
{
"epoch": 1.92,
"learning_rate": 1.615195677091862e-05,
"loss": 3.0892,
"step": 23500
},
{
"epoch": 1.93,
"learning_rate": 1.6135582118879976e-05,
"loss": 3.0579,
"step": 23600
},
{
"epoch": 1.94,
"learning_rate": 1.611920746684133e-05,
"loss": 3.0764,
"step": 23700
},
{
"epoch": 1.95,
"learning_rate": 1.6102832814802688e-05,
"loss": 3.0782,
"step": 23800
},
{
"epoch": 1.96,
"learning_rate": 1.6086458162764043e-05,
"loss": 3.1002,
"step": 23900
},
{
"epoch": 1.96,
"learning_rate": 1.6070083510725398e-05,
"loss": 3.0959,
"step": 24000
},
{
"epoch": 1.97,
"learning_rate": 1.6053708858686756e-05,
"loss": 3.1025,
"step": 24100
},
{
"epoch": 1.98,
"learning_rate": 1.603733420664811e-05,
"loss": 3.0993,
"step": 24200
},
{
"epoch": 1.99,
"learning_rate": 1.6020959554609465e-05,
"loss": 3.096,
"step": 24300
},
{
"epoch": 2.0,
"learning_rate": 1.600458490257082e-05,
"loss": 3.087,
"step": 24400
},
{
"epoch": 2.0,
"eval_bleu": 811.1414,
"eval_chrf": 2564.1192,
"eval_chrf_plus": 2406.0925,
"eval_gen_len": 14.665,
"eval_loss": 2.703340530395508,
"eval_runtime": 38.3779,
"eval_samples_per_second": 26.057,
"eval_steps_per_second": 1.094,
"step": 24428
},
{
"epoch": 2.01,
"learning_rate": 1.5988210250532177e-05,
"loss": 3.0467,
"step": 24500
},
{
"epoch": 2.01,
"learning_rate": 1.5971835598493532e-05,
"loss": 3.0037,
"step": 24600
},
{
"epoch": 2.02,
"learning_rate": 1.5955460946454887e-05,
"loss": 3.0283,
"step": 24700
},
{
"epoch": 2.03,
"learning_rate": 1.5939086294416245e-05,
"loss": 3.0406,
"step": 24800
},
{
"epoch": 2.04,
"learning_rate": 1.5922711642377603e-05,
"loss": 3.0001,
"step": 24900
},
{
"epoch": 2.05,
"learning_rate": 1.5906336990338957e-05,
"loss": 3.026,
"step": 25000
},
{
"epoch": 2.06,
"learning_rate": 1.5889962338300312e-05,
"loss": 3.0017,
"step": 25100
},
{
"epoch": 2.06,
"learning_rate": 1.587358768626167e-05,
"loss": 3.0257,
"step": 25200
},
{
"epoch": 2.07,
"learning_rate": 1.5857213034223025e-05,
"loss": 2.999,
"step": 25300
},
{
"epoch": 2.08,
"learning_rate": 1.584083838218438e-05,
"loss": 3.024,
"step": 25400
},
{
"epoch": 2.09,
"learning_rate": 1.5824463730145737e-05,
"loss": 3.0346,
"step": 25500
},
{
"epoch": 2.1,
"learning_rate": 1.5808089078107092e-05,
"loss": 2.9922,
"step": 25600
},
{
"epoch": 2.1,
"learning_rate": 1.5791714426068447e-05,
"loss": 3.0021,
"step": 25700
},
{
"epoch": 2.11,
"learning_rate": 1.5775339774029805e-05,
"loss": 3.0432,
"step": 25800
},
{
"epoch": 2.12,
"learning_rate": 1.575896512199116e-05,
"loss": 2.9909,
"step": 25900
},
{
"epoch": 2.13,
"learning_rate": 1.5742590469952514e-05,
"loss": 3.039,
"step": 26000
},
{
"epoch": 2.14,
"learning_rate": 1.5726215817913872e-05,
"loss": 2.9914,
"step": 26100
},
{
"epoch": 2.15,
"learning_rate": 1.5709841165875227e-05,
"loss": 3.0141,
"step": 26200
},
{
"epoch": 2.15,
"learning_rate": 1.569346651383658e-05,
"loss": 3.0008,
"step": 26300
},
{
"epoch": 2.16,
"learning_rate": 1.5677091861797936e-05,
"loss": 3.0262,
"step": 26400
},
{
"epoch": 2.17,
"learning_rate": 1.5660717209759294e-05,
"loss": 3.0113,
"step": 26500
},
{
"epoch": 2.18,
"learning_rate": 1.564434255772065e-05,
"loss": 3.038,
"step": 26600
},
{
"epoch": 2.19,
"learning_rate": 1.5627967905682006e-05,
"loss": 3.0078,
"step": 26700
},
{
"epoch": 2.19,
"learning_rate": 1.561159325364336e-05,
"loss": 3.0023,
"step": 26800
},
{
"epoch": 2.2,
"learning_rate": 1.559521860160472e-05,
"loss": 3.0095,
"step": 26900
},
{
"epoch": 2.21,
"learning_rate": 1.5578843949566074e-05,
"loss": 2.9869,
"step": 27000
},
{
"epoch": 2.22,
"learning_rate": 1.556246929752743e-05,
"loss": 3.0389,
"step": 27100
},
{
"epoch": 2.23,
"learning_rate": 1.5546094645488786e-05,
"loss": 3.0175,
"step": 27200
},
{
"epoch": 2.24,
"learning_rate": 1.552971999345014e-05,
"loss": 3.0092,
"step": 27300
},
{
"epoch": 2.24,
"learning_rate": 1.5513345341411496e-05,
"loss": 2.9981,
"step": 27400
},
{
"epoch": 2.25,
"learning_rate": 1.5496970689372854e-05,
"loss": 2.999,
"step": 27500
},
{
"epoch": 2.26,
"learning_rate": 1.5480596037334208e-05,
"loss": 2.9858,
"step": 27600
},
{
"epoch": 2.27,
"learning_rate": 1.5464221385295563e-05,
"loss": 2.9776,
"step": 27700
},
{
"epoch": 2.28,
"learning_rate": 1.544784673325692e-05,
"loss": 2.9879,
"step": 27800
},
{
"epoch": 2.28,
"learning_rate": 1.5431472081218276e-05,
"loss": 3.0294,
"step": 27900
},
{
"epoch": 2.29,
"learning_rate": 1.541509742917963e-05,
"loss": 3.048,
"step": 28000
},
{
"epoch": 2.3,
"learning_rate": 1.5398722777140985e-05,
"loss": 3.0125,
"step": 28100
},
{
"epoch": 2.31,
"learning_rate": 1.5382348125102343e-05,
"loss": 3.0201,
"step": 28200
},
{
"epoch": 2.32,
"learning_rate": 1.5365973473063697e-05,
"loss": 2.9874,
"step": 28300
},
{
"epoch": 2.33,
"learning_rate": 1.5349598821025052e-05,
"loss": 2.9942,
"step": 28400
},
{
"epoch": 2.33,
"learning_rate": 1.533322416898641e-05,
"loss": 3.0,
"step": 28500
},
{
"epoch": 2.34,
"learning_rate": 1.5316849516947768e-05,
"loss": 2.9812,
"step": 28600
},
{
"epoch": 2.35,
"learning_rate": 1.5300474864909123e-05,
"loss": 2.9315,
"step": 28700
},
{
"epoch": 2.36,
"learning_rate": 1.5284100212870477e-05,
"loss": 3.0019,
"step": 28800
},
{
"epoch": 2.37,
"learning_rate": 1.5267725560831835e-05,
"loss": 2.9821,
"step": 28900
},
{
"epoch": 2.37,
"learning_rate": 1.525135090879319e-05,
"loss": 3.0036,
"step": 29000
},
{
"epoch": 2.38,
"learning_rate": 1.5234976256754546e-05,
"loss": 2.9855,
"step": 29100
},
{
"epoch": 2.39,
"learning_rate": 1.5218601604715901e-05,
"loss": 2.9627,
"step": 29200
},
{
"epoch": 2.4,
"learning_rate": 1.5202226952677257e-05,
"loss": 2.9755,
"step": 29300
},
{
"epoch": 2.41,
"learning_rate": 1.5185852300638612e-05,
"loss": 2.9636,
"step": 29400
},
{
"epoch": 2.42,
"learning_rate": 1.5169477648599968e-05,
"loss": 2.9918,
"step": 29500
},
{
"epoch": 2.42,
"learning_rate": 1.5153102996561325e-05,
"loss": 2.9975,
"step": 29600
},
{
"epoch": 2.43,
"learning_rate": 1.513672834452268e-05,
"loss": 2.9579,
"step": 29700
},
{
"epoch": 2.44,
"learning_rate": 1.5120353692484036e-05,
"loss": 2.9867,
"step": 29800
},
{
"epoch": 2.45,
"learning_rate": 1.5103979040445392e-05,
"loss": 2.9265,
"step": 29900
},
{
"epoch": 2.46,
"learning_rate": 1.5087604388406747e-05,
"loss": 2.9504,
"step": 30000
},
{
"epoch": 2.46,
"learning_rate": 1.5071229736368103e-05,
"loss": 3.0254,
"step": 30100
},
{
"epoch": 2.47,
"learning_rate": 1.5054855084329459e-05,
"loss": 2.9785,
"step": 30200
},
{
"epoch": 2.48,
"learning_rate": 1.5038480432290814e-05,
"loss": 2.9722,
"step": 30300
},
{
"epoch": 2.49,
"learning_rate": 1.5022105780252172e-05,
"loss": 2.9798,
"step": 30400
},
{
"epoch": 2.5,
"learning_rate": 1.5005731128213528e-05,
"loss": 2.9653,
"step": 30500
},
{
"epoch": 2.51,
"learning_rate": 1.4989356476174883e-05,
"loss": 2.9449,
"step": 30600
},
{
"epoch": 2.51,
"learning_rate": 1.4972981824136239e-05,
"loss": 2.9874,
"step": 30700
},
{
"epoch": 2.52,
"learning_rate": 1.4956607172097595e-05,
"loss": 2.9705,
"step": 30800
},
{
"epoch": 2.53,
"learning_rate": 1.494023252005895e-05,
"loss": 2.9521,
"step": 30900
},
{
"epoch": 2.54,
"learning_rate": 1.4923857868020306e-05,
"loss": 2.953,
"step": 31000
},
{
"epoch": 2.55,
"learning_rate": 1.4907483215981661e-05,
"loss": 2.9573,
"step": 31100
},
{
"epoch": 2.55,
"learning_rate": 1.4891108563943017e-05,
"loss": 2.9484,
"step": 31200
},
{
"epoch": 2.56,
"learning_rate": 1.4874733911904374e-05,
"loss": 2.974,
"step": 31300
},
{
"epoch": 2.57,
"learning_rate": 1.4858359259865728e-05,
"loss": 2.9488,
"step": 31400
},
{
"epoch": 2.58,
"learning_rate": 1.4841984607827085e-05,
"loss": 2.9693,
"step": 31500
},
{
"epoch": 2.59,
"learning_rate": 1.4825609955788441e-05,
"loss": 2.9617,
"step": 31600
},
{
"epoch": 2.6,
"learning_rate": 1.4809235303749796e-05,
"loss": 2.9411,
"step": 31700
},
{
"epoch": 2.6,
"learning_rate": 1.4792860651711152e-05,
"loss": 2.9701,
"step": 31800
},
{
"epoch": 2.61,
"learning_rate": 1.4776485999672508e-05,
"loss": 2.9615,
"step": 31900
},
{
"epoch": 2.62,
"learning_rate": 1.4760111347633863e-05,
"loss": 2.9625,
"step": 32000
},
{
"epoch": 2.63,
"learning_rate": 1.4743736695595219e-05,
"loss": 2.9556,
"step": 32100
},
{
"epoch": 2.64,
"learning_rate": 1.4727362043556577e-05,
"loss": 2.9596,
"step": 32200
},
{
"epoch": 2.64,
"learning_rate": 1.4710987391517932e-05,
"loss": 2.9666,
"step": 32300
},
{
"epoch": 2.65,
"learning_rate": 1.4694612739479288e-05,
"loss": 2.9693,
"step": 32400
},
{
"epoch": 2.66,
"learning_rate": 1.4678238087440644e-05,
"loss": 2.9688,
"step": 32500
},
{
"epoch": 2.67,
"learning_rate": 1.4661863435401999e-05,
"loss": 2.9515,
"step": 32600
},
{
"epoch": 2.68,
"learning_rate": 1.4645488783363355e-05,
"loss": 2.968,
"step": 32700
},
{
"epoch": 2.69,
"learning_rate": 1.4629114131324712e-05,
"loss": 2.9521,
"step": 32800
},
{
"epoch": 2.69,
"learning_rate": 1.4612739479286066e-05,
"loss": 2.9489,
"step": 32900
},
{
"epoch": 2.7,
"learning_rate": 1.4596364827247423e-05,
"loss": 2.9333,
"step": 33000
},
{
"epoch": 2.71,
"learning_rate": 1.4579990175208777e-05,
"loss": 2.9158,
"step": 33100
},
{
"epoch": 2.72,
"learning_rate": 1.4563615523170134e-05,
"loss": 2.9574,
"step": 33200
},
{
"epoch": 2.73,
"learning_rate": 1.454724087113149e-05,
"loss": 2.947,
"step": 33300
},
{
"epoch": 2.73,
"learning_rate": 1.4530866219092845e-05,
"loss": 2.9294,
"step": 33400
},
{
"epoch": 2.74,
"learning_rate": 1.4514491567054201e-05,
"loss": 2.924,
"step": 33500
},
{
"epoch": 2.75,
"learning_rate": 1.4498116915015557e-05,
"loss": 2.9552,
"step": 33600
},
{
"epoch": 2.76,
"learning_rate": 1.4481742262976912e-05,
"loss": 2.9372,
"step": 33700
},
{
"epoch": 2.77,
"learning_rate": 1.4465367610938268e-05,
"loss": 2.9317,
"step": 33800
},
{
"epoch": 2.78,
"learning_rate": 1.4448992958899623e-05,
"loss": 2.9588,
"step": 33900
},
{
"epoch": 2.78,
"learning_rate": 1.4432618306860979e-05,
"loss": 2.9627,
"step": 34000
},
{
"epoch": 2.79,
"learning_rate": 1.4416243654822337e-05,
"loss": 2.9301,
"step": 34100
},
{
"epoch": 2.8,
"learning_rate": 1.4399869002783693e-05,
"loss": 2.9504,
"step": 34200
},
{
"epoch": 2.81,
"learning_rate": 1.4383494350745048e-05,
"loss": 2.9052,
"step": 34300
},
{
"epoch": 2.82,
"learning_rate": 1.4367119698706404e-05,
"loss": 2.9013,
"step": 34400
},
{
"epoch": 2.82,
"learning_rate": 1.435074504666776e-05,
"loss": 2.9394,
"step": 34500
},
{
"epoch": 2.83,
"learning_rate": 1.4334370394629115e-05,
"loss": 2.9642,
"step": 34600
},
{
"epoch": 2.84,
"learning_rate": 1.4317995742590472e-05,
"loss": 2.9105,
"step": 34700
},
{
"epoch": 2.85,
"learning_rate": 1.4301621090551826e-05,
"loss": 2.9039,
"step": 34800
},
{
"epoch": 2.86,
"learning_rate": 1.4285246438513183e-05,
"loss": 2.9139,
"step": 34900
},
{
"epoch": 2.87,
"learning_rate": 1.4268871786474539e-05,
"loss": 2.9577,
"step": 35000
},
{
"epoch": 2.87,
"learning_rate": 1.4252497134435894e-05,
"loss": 2.9013,
"step": 35100
},
{
"epoch": 2.88,
"learning_rate": 1.423612248239725e-05,
"loss": 2.9357,
"step": 35200
},
{
"epoch": 2.89,
"learning_rate": 1.4219747830358606e-05,
"loss": 2.9391,
"step": 35300
},
{
"epoch": 2.9,
"learning_rate": 1.4203373178319961e-05,
"loss": 2.9265,
"step": 35400
},
{
"epoch": 2.91,
"learning_rate": 1.4186998526281317e-05,
"loss": 2.935,
"step": 35500
},
{
"epoch": 2.91,
"learning_rate": 1.4170623874242674e-05,
"loss": 2.9592,
"step": 35600
},
{
"epoch": 2.92,
"learning_rate": 1.4154249222204028e-05,
"loss": 2.9216,
"step": 35700
},
{
"epoch": 2.93,
"learning_rate": 1.4137874570165385e-05,
"loss": 2.924,
"step": 35800
},
{
"epoch": 2.94,
"learning_rate": 1.4121499918126743e-05,
"loss": 2.9216,
"step": 35900
},
{
"epoch": 2.95,
"learning_rate": 1.4105125266088097e-05,
"loss": 2.9226,
"step": 36000
},
{
"epoch": 2.96,
"learning_rate": 1.4088750614049453e-05,
"loss": 2.9056,
"step": 36100
},
{
"epoch": 2.96,
"learning_rate": 1.407237596201081e-05,
"loss": 2.9386,
"step": 36200
},
{
"epoch": 2.97,
"learning_rate": 1.4056001309972164e-05,
"loss": 2.9027,
"step": 36300
},
{
"epoch": 2.98,
"learning_rate": 1.403962665793352e-05,
"loss": 2.9573,
"step": 36400
},
{
"epoch": 2.99,
"learning_rate": 1.4023252005894875e-05,
"loss": 2.9368,
"step": 36500
},
{
"epoch": 3.0,
"learning_rate": 1.4006877353856232e-05,
"loss": 2.905,
"step": 36600
},
{
"epoch": 3.0,
"eval_bleu": 871.025,
"eval_chrf": 2652.0809,
"eval_chrf_plus": 2492.3287,
"eval_gen_len": 14.666,
"eval_loss": 2.5985164642333984,
"eval_runtime": 38.3521,
"eval_samples_per_second": 26.074,
"eval_steps_per_second": 1.095,
"step": 36642
},
{
"epoch": 3.0,
"learning_rate": 1.3990502701817588e-05,
"loss": 2.8775,
"step": 36700
},
{
"epoch": 3.01,
"learning_rate": 1.3974128049778943e-05,
"loss": 2.8513,
"step": 36800
},
{
"epoch": 3.02,
"learning_rate": 1.3957753397740299e-05,
"loss": 2.8564,
"step": 36900
},
{
"epoch": 3.03,
"learning_rate": 1.3941378745701655e-05,
"loss": 2.8596,
"step": 37000
},
{
"epoch": 3.04,
"learning_rate": 1.392500409366301e-05,
"loss": 2.8591,
"step": 37100
},
{
"epoch": 3.05,
"learning_rate": 1.3908629441624366e-05,
"loss": 2.8619,
"step": 37200
},
{
"epoch": 3.05,
"learning_rate": 1.3892254789585723e-05,
"loss": 2.8753,
"step": 37300
},
{
"epoch": 3.06,
"learning_rate": 1.3875880137547077e-05,
"loss": 2.8761,
"step": 37400
},
{
"epoch": 3.07,
"learning_rate": 1.3859505485508434e-05,
"loss": 2.8231,
"step": 37500
},
{
"epoch": 3.08,
"learning_rate": 1.3843130833469788e-05,
"loss": 2.8332,
"step": 37600
},
{
"epoch": 3.09,
"learning_rate": 1.3826756181431145e-05,
"loss": 2.8417,
"step": 37700
},
{
"epoch": 3.09,
"learning_rate": 1.3810381529392503e-05,
"loss": 2.8426,
"step": 37800
},
{
"epoch": 3.1,
"learning_rate": 1.3794006877353859e-05,
"loss": 2.8603,
"step": 37900
},
{
"epoch": 3.11,
"learning_rate": 1.3777632225315213e-05,
"loss": 2.8538,
"step": 38000
},
{
"epoch": 3.12,
"learning_rate": 1.376125757327657e-05,
"loss": 2.8442,
"step": 38100
},
{
"epoch": 3.13,
"learning_rate": 1.3744882921237926e-05,
"loss": 2.8499,
"step": 38200
},
{
"epoch": 3.14,
"learning_rate": 1.372850826919928e-05,
"loss": 2.8347,
"step": 38300
},
{
"epoch": 3.14,
"learning_rate": 1.3712133617160637e-05,
"loss": 2.8528,
"step": 38400
},
{
"epoch": 3.15,
"learning_rate": 1.3695758965121992e-05,
"loss": 2.8532,
"step": 38500
},
{
"epoch": 3.16,
"learning_rate": 1.3679384313083348e-05,
"loss": 2.8538,
"step": 38600
},
{
"epoch": 3.17,
"learning_rate": 1.3663009661044704e-05,
"loss": 2.8473,
"step": 38700
},
{
"epoch": 3.18,
"learning_rate": 1.3646635009006059e-05,
"loss": 2.8365,
"step": 38800
},
{
"epoch": 3.18,
"learning_rate": 1.3630260356967415e-05,
"loss": 2.8565,
"step": 38900
},
{
"epoch": 3.19,
"learning_rate": 1.3613885704928772e-05,
"loss": 2.8504,
"step": 39000
},
{
"epoch": 3.2,
"learning_rate": 1.3597511052890126e-05,
"loss": 2.8732,
"step": 39100
},
{
"epoch": 3.21,
"learning_rate": 1.3581136400851483e-05,
"loss": 2.8724,
"step": 39200
},
{
"epoch": 3.22,
"learning_rate": 1.3564761748812837e-05,
"loss": 2.8346,
"step": 39300
},
{
"epoch": 3.23,
"learning_rate": 1.3548387096774194e-05,
"loss": 2.8302,
"step": 39400
},
{
"epoch": 3.23,
"learning_rate": 1.353201244473555e-05,
"loss": 2.8644,
"step": 39500
},
{
"epoch": 3.24,
"learning_rate": 1.3515637792696908e-05,
"loss": 2.8376,
"step": 39600
},
{
"epoch": 3.25,
"learning_rate": 1.3499263140658263e-05,
"loss": 2.8783,
"step": 39700
},
{
"epoch": 3.26,
"learning_rate": 1.3482888488619619e-05,
"loss": 2.8492,
"step": 39800
},
{
"epoch": 3.27,
"learning_rate": 1.3466513836580975e-05,
"loss": 2.8266,
"step": 39900
},
{
"epoch": 3.27,
"learning_rate": 1.345013918454233e-05,
"loss": 2.8566,
"step": 40000
},
{
"epoch": 3.28,
"learning_rate": 1.3433764532503686e-05,
"loss": 2.8507,
"step": 40100
},
{
"epoch": 3.29,
"learning_rate": 1.341738988046504e-05,
"loss": 2.872,
"step": 40200
},
{
"epoch": 3.3,
"learning_rate": 1.3401015228426397e-05,
"loss": 2.8386,
"step": 40300
},
{
"epoch": 3.31,
"learning_rate": 1.3384640576387753e-05,
"loss": 2.8702,
"step": 40400
},
{
"epoch": 3.32,
"learning_rate": 1.3368265924349108e-05,
"loss": 2.8243,
"step": 40500
},
{
"epoch": 3.32,
"learning_rate": 1.3351891272310464e-05,
"loss": 2.8271,
"step": 40600
},
{
"epoch": 3.33,
"learning_rate": 1.333551662027182e-05,
"loss": 2.8504,
"step": 40700
},
{
"epoch": 3.34,
"learning_rate": 1.3319141968233175e-05,
"loss": 2.8346,
"step": 40800
},
{
"epoch": 3.35,
"learning_rate": 1.3302767316194532e-05,
"loss": 2.8516,
"step": 40900
},
{
"epoch": 3.36,
"learning_rate": 1.3286392664155888e-05,
"loss": 2.8289,
"step": 41000
},
{
"epoch": 3.36,
"learning_rate": 1.3270018012117243e-05,
"loss": 2.8258,
"step": 41100
},
{
"epoch": 3.37,
"learning_rate": 1.3253643360078599e-05,
"loss": 2.8654,
"step": 41200
},
{
"epoch": 3.38,
"learning_rate": 1.3237268708039954e-05,
"loss": 2.84,
"step": 41300
},
{
"epoch": 3.39,
"learning_rate": 1.3220894056001312e-05,
"loss": 2.8269,
"step": 41400
},
{
"epoch": 3.4,
"learning_rate": 1.3204519403962668e-05,
"loss": 2.8432,
"step": 41500
},
{
"epoch": 3.41,
"learning_rate": 1.3188144751924024e-05,
"loss": 2.8462,
"step": 41600
},
{
"epoch": 3.41,
"learning_rate": 1.3171770099885379e-05,
"loss": 2.8131,
"step": 41700
},
{
"epoch": 3.42,
"learning_rate": 1.3155395447846735e-05,
"loss": 2.826,
"step": 41800
},
{
"epoch": 3.43,
"learning_rate": 1.313902079580809e-05,
"loss": 2.8396,
"step": 41900
},
{
"epoch": 3.44,
"learning_rate": 1.3122646143769446e-05,
"loss": 2.8241,
"step": 42000
},
{
"epoch": 3.45,
"learning_rate": 1.3106271491730802e-05,
"loss": 2.7975,
"step": 42100
},
{
"epoch": 3.46,
"learning_rate": 1.3089896839692157e-05,
"loss": 2.8249,
"step": 42200
},
{
"epoch": 3.46,
"learning_rate": 1.3073522187653513e-05,
"loss": 2.8189,
"step": 42300
},
{
"epoch": 3.47,
"learning_rate": 1.305714753561487e-05,
"loss": 2.8253,
"step": 42400
},
{
"epoch": 3.48,
"learning_rate": 1.3040772883576224e-05,
"loss": 2.8061,
"step": 42500
},
{
"epoch": 3.49,
"learning_rate": 1.302439823153758e-05,
"loss": 2.8074,
"step": 42600
},
{
"epoch": 3.5,
"learning_rate": 1.3008023579498937e-05,
"loss": 2.8325,
"step": 42700
},
{
"epoch": 3.5,
"learning_rate": 1.2991648927460292e-05,
"loss": 2.8204,
"step": 42800
},
{
"epoch": 3.51,
"learning_rate": 1.2975274275421648e-05,
"loss": 2.8388,
"step": 42900
},
{
"epoch": 3.52,
"learning_rate": 1.2958899623383003e-05,
"loss": 2.8278,
"step": 43000
},
{
"epoch": 3.53,
"learning_rate": 1.2942524971344359e-05,
"loss": 2.8339,
"step": 43100
},
{
"epoch": 3.54,
"learning_rate": 1.2926150319305715e-05,
"loss": 2.804,
"step": 43200
},
{
"epoch": 3.55,
"learning_rate": 1.2909775667267073e-05,
"loss": 2.8434,
"step": 43300
},
{
"epoch": 3.55,
"learning_rate": 1.2893401015228428e-05,
"loss": 2.8266,
"step": 43400
},
{
"epoch": 3.56,
"learning_rate": 1.2877026363189784e-05,
"loss": 2.8084,
"step": 43500
},
{
"epoch": 3.57,
"learning_rate": 1.286065171115114e-05,
"loss": 2.808,
"step": 43600
},
{
"epoch": 3.58,
"learning_rate": 1.2844277059112495e-05,
"loss": 2.8342,
"step": 43700
},
{
"epoch": 3.59,
"learning_rate": 1.2827902407073852e-05,
"loss": 2.8459,
"step": 43800
},
{
"epoch": 3.59,
"learning_rate": 1.2811527755035206e-05,
"loss": 2.8219,
"step": 43900
},
{
"epoch": 3.6,
"learning_rate": 1.2795153102996562e-05,
"loss": 2.8075,
"step": 44000
},
{
"epoch": 3.61,
"learning_rate": 1.2778778450957919e-05,
"loss": 2.8214,
"step": 44100
},
{
"epoch": 3.62,
"learning_rate": 1.2762403798919273e-05,
"loss": 2.8223,
"step": 44200
},
{
"epoch": 3.63,
"learning_rate": 1.274602914688063e-05,
"loss": 2.8085,
"step": 44300
},
{
"epoch": 3.64,
"learning_rate": 1.2729654494841986e-05,
"loss": 2.8086,
"step": 44400
},
{
"epoch": 3.64,
"learning_rate": 1.271327984280334e-05,
"loss": 2.8039,
"step": 44500
},
{
"epoch": 3.65,
"learning_rate": 1.2696905190764697e-05,
"loss": 2.8223,
"step": 44600
},
{
"epoch": 3.66,
"learning_rate": 1.2680530538726052e-05,
"loss": 2.8165,
"step": 44700
},
{
"epoch": 3.67,
"learning_rate": 1.2664155886687408e-05,
"loss": 2.8012,
"step": 44800
},
{
"epoch": 3.68,
"learning_rate": 1.2647781234648764e-05,
"loss": 2.8065,
"step": 44900
},
{
"epoch": 3.68,
"learning_rate": 1.2631406582610119e-05,
"loss": 2.8251,
"step": 45000
},
{
"epoch": 3.69,
"learning_rate": 1.2615031930571477e-05,
"loss": 2.8402,
"step": 45100
},
{
"epoch": 3.7,
"learning_rate": 1.2598657278532833e-05,
"loss": 2.7951,
"step": 45200
},
{
"epoch": 3.71,
"learning_rate": 1.258228262649419e-05,
"loss": 2.8053,
"step": 45300
},
{
"epoch": 3.72,
"learning_rate": 1.2565907974455544e-05,
"loss": 2.8495,
"step": 45400
},
{
"epoch": 3.73,
"learning_rate": 1.25495333224169e-05,
"loss": 2.8131,
"step": 45500
},
{
"epoch": 3.73,
"learning_rate": 1.2533158670378255e-05,
"loss": 2.7888,
"step": 45600
},
{
"epoch": 3.74,
"learning_rate": 1.2516784018339612e-05,
"loss": 2.8165,
"step": 45700
},
{
"epoch": 3.75,
"learning_rate": 1.2500409366300968e-05,
"loss": 2.814,
"step": 45800
},
{
"epoch": 3.76,
"learning_rate": 1.2484034714262322e-05,
"loss": 2.8309,
"step": 45900
},
{
"epoch": 3.77,
"learning_rate": 1.2467660062223679e-05,
"loss": 2.7945,
"step": 46000
},
{
"epoch": 3.77,
"learning_rate": 1.2451285410185035e-05,
"loss": 2.7825,
"step": 46100
},
{
"epoch": 3.78,
"learning_rate": 1.243491075814639e-05,
"loss": 2.7801,
"step": 46200
},
{
"epoch": 3.79,
"learning_rate": 1.2418536106107746e-05,
"loss": 2.8058,
"step": 46300
},
{
"epoch": 3.8,
"learning_rate": 1.2402161454069102e-05,
"loss": 2.8132,
"step": 46400
},
{
"epoch": 3.81,
"learning_rate": 1.2385786802030457e-05,
"loss": 2.807,
"step": 46500
},
{
"epoch": 3.82,
"learning_rate": 1.2369412149991813e-05,
"loss": 2.7962,
"step": 46600
},
{
"epoch": 3.82,
"learning_rate": 1.2353037497953168e-05,
"loss": 2.8217,
"step": 46700
},
{
"epoch": 3.83,
"learning_rate": 1.2336662845914524e-05,
"loss": 2.7893,
"step": 46800
},
{
"epoch": 3.84,
"learning_rate": 1.232028819387588e-05,
"loss": 2.7977,
"step": 46900
},
{
"epoch": 3.85,
"learning_rate": 1.2303913541837239e-05,
"loss": 2.8111,
"step": 47000
},
{
"epoch": 3.86,
"learning_rate": 1.2287538889798593e-05,
"loss": 2.7867,
"step": 47100
},
{
"epoch": 3.86,
"learning_rate": 1.227116423775995e-05,
"loss": 2.7771,
"step": 47200
},
{
"epoch": 3.87,
"learning_rate": 1.2254789585721304e-05,
"loss": 2.7968,
"step": 47300
},
{
"epoch": 3.88,
"learning_rate": 1.223841493368266e-05,
"loss": 2.8271,
"step": 47400
},
{
"epoch": 3.89,
"learning_rate": 1.2222040281644017e-05,
"loss": 2.8087,
"step": 47500
},
{
"epoch": 3.9,
"learning_rate": 1.2205665629605372e-05,
"loss": 2.8017,
"step": 47600
},
{
"epoch": 3.91,
"learning_rate": 1.2189290977566728e-05,
"loss": 2.8084,
"step": 47700
},
{
"epoch": 3.91,
"learning_rate": 1.2172916325528084e-05,
"loss": 2.8168,
"step": 47800
},
{
"epoch": 3.92,
"learning_rate": 1.2156541673489439e-05,
"loss": 2.8394,
"step": 47900
},
{
"epoch": 3.93,
"learning_rate": 1.2140167021450795e-05,
"loss": 2.7815,
"step": 48000
},
{
"epoch": 3.94,
"learning_rate": 1.2123792369412151e-05,
"loss": 2.8058,
"step": 48100
},
{
"epoch": 3.95,
"learning_rate": 1.2107417717373506e-05,
"loss": 2.771,
"step": 48200
},
{
"epoch": 3.95,
"learning_rate": 1.2091043065334862e-05,
"loss": 2.8163,
"step": 48300
},
{
"epoch": 3.96,
"learning_rate": 1.2074668413296217e-05,
"loss": 2.8269,
"step": 48400
},
{
"epoch": 3.97,
"learning_rate": 1.2058293761257573e-05,
"loss": 2.823,
"step": 48500
},
{
"epoch": 3.98,
"learning_rate": 1.204191910921893e-05,
"loss": 2.7733,
"step": 48600
},
{
"epoch": 3.99,
"learning_rate": 1.2025544457180284e-05,
"loss": 2.7565,
"step": 48700
},
{
"epoch": 4.0,
"learning_rate": 1.2009169805141642e-05,
"loss": 2.7801,
"step": 48800
},
{
"epoch": 4.0,
"eval_bleu": 901.064,
"eval_chrf": 2735.6707,
"eval_chrf_plus": 2574.683,
"eval_gen_len": 14.662,
"eval_loss": 2.527679920196533,
"eval_runtime": 38.6477,
"eval_samples_per_second": 25.875,
"eval_steps_per_second": 1.087,
"step": 48856
},
{
"epoch": 4.0,
"learning_rate": 1.1992795153102999e-05,
"loss": 2.7625,
"step": 48900
},
{
"epoch": 4.01,
"learning_rate": 1.1976420501064355e-05,
"loss": 2.7016,
"step": 49000
},
{
"epoch": 4.02,
"learning_rate": 1.196004584902571e-05,
"loss": 2.74,
"step": 49100
},
{
"epoch": 4.03,
"learning_rate": 1.1943671196987066e-05,
"loss": 2.7466,
"step": 49200
},
{
"epoch": 4.04,
"learning_rate": 1.192729654494842e-05,
"loss": 2.7237,
"step": 49300
},
{
"epoch": 4.04,
"learning_rate": 1.1910921892909777e-05,
"loss": 2.768,
"step": 49400
},
{
"epoch": 4.05,
"learning_rate": 1.1894547240871133e-05,
"loss": 2.7265,
"step": 49500
},
{
"epoch": 4.06,
"learning_rate": 1.1878172588832488e-05,
"loss": 2.7179,
"step": 49600
},
{
"epoch": 4.07,
"learning_rate": 1.1861797936793844e-05,
"loss": 2.6952,
"step": 49700
},
{
"epoch": 4.08,
"learning_rate": 1.18454232847552e-05,
"loss": 2.7131,
"step": 49800
},
{
"epoch": 4.09,
"learning_rate": 1.1829048632716555e-05,
"loss": 2.7492,
"step": 49900
},
{
"epoch": 4.09,
"learning_rate": 1.1812673980677911e-05,
"loss": 2.7349,
"step": 50000
},
{
"epoch": 4.1,
"learning_rate": 1.1796299328639266e-05,
"loss": 2.7603,
"step": 50100
},
{
"epoch": 4.11,
"learning_rate": 1.1779924676600622e-05,
"loss": 2.7132,
"step": 50200
},
{
"epoch": 4.12,
"learning_rate": 1.1763550024561979e-05,
"loss": 2.7195,
"step": 50300
},
{
"epoch": 4.13,
"learning_rate": 1.1747175372523333e-05,
"loss": 2.7586,
"step": 50400
},
{
"epoch": 4.13,
"learning_rate": 1.173080072048469e-05,
"loss": 2.6895,
"step": 50500
},
{
"epoch": 4.14,
"learning_rate": 1.1714426068446048e-05,
"loss": 2.7115,
"step": 50600
},
{
"epoch": 4.15,
"learning_rate": 1.1698051416407404e-05,
"loss": 2.7099,
"step": 50700
},
{
"epoch": 4.16,
"learning_rate": 1.1681676764368759e-05,
"loss": 2.7313,
"step": 50800
},
{
"epoch": 4.17,
"learning_rate": 1.1665302112330115e-05,
"loss": 2.7493,
"step": 50900
},
{
"epoch": 4.18,
"learning_rate": 1.164892746029147e-05,
"loss": 2.7305,
"step": 51000
},
{
"epoch": 4.18,
"learning_rate": 1.1632552808252826e-05,
"loss": 2.7458,
"step": 51100
},
{
"epoch": 4.19,
"learning_rate": 1.1616178156214182e-05,
"loss": 2.6953,
"step": 51200
},
{
"epoch": 4.2,
"learning_rate": 1.1599803504175537e-05,
"loss": 2.7367,
"step": 51300
},
{
"epoch": 4.21,
"learning_rate": 1.1583428852136893e-05,
"loss": 2.7498,
"step": 51400
},
{
"epoch": 4.22,
"learning_rate": 1.156705420009825e-05,
"loss": 2.727,
"step": 51500
},
{
"epoch": 4.22,
"learning_rate": 1.1550679548059604e-05,
"loss": 2.7481,
"step": 51600
},
{
"epoch": 4.23,
"learning_rate": 1.153430489602096e-05,
"loss": 2.7545,
"step": 51700
},
{
"epoch": 4.24,
"learning_rate": 1.1517930243982317e-05,
"loss": 2.7321,
"step": 51800
},
{
"epoch": 4.25,
"learning_rate": 1.1501555591943671e-05,
"loss": 2.7258,
"step": 51900
},
{
"epoch": 4.26,
"learning_rate": 1.1485180939905028e-05,
"loss": 2.7123,
"step": 52000
},
{
"epoch": 4.27,
"learning_rate": 1.1468806287866382e-05,
"loss": 2.7354,
"step": 52100
},
{
"epoch": 4.27,
"learning_rate": 1.1452431635827739e-05,
"loss": 2.7387,
"step": 52200
},
{
"epoch": 4.28,
"learning_rate": 1.1436056983789095e-05,
"loss": 2.7635,
"step": 52300
},
{
"epoch": 4.29,
"learning_rate": 1.141968233175045e-05,
"loss": 2.7104,
"step": 52400
},
{
"epoch": 4.3,
"learning_rate": 1.1403307679711808e-05,
"loss": 2.7499,
"step": 52500
},
{
"epoch": 4.31,
"learning_rate": 1.1386933027673164e-05,
"loss": 2.7054,
"step": 52600
},
{
"epoch": 4.31,
"learning_rate": 1.1370558375634519e-05,
"loss": 2.7084,
"step": 52700
},
{
"epoch": 4.32,
"learning_rate": 1.1354183723595875e-05,
"loss": 2.7467,
"step": 52800
},
{
"epoch": 4.33,
"learning_rate": 1.1337809071557231e-05,
"loss": 2.7327,
"step": 52900
},
{
"epoch": 4.34,
"learning_rate": 1.1321434419518586e-05,
"loss": 2.6927,
"step": 53000
},
{
"epoch": 4.35,
"learning_rate": 1.1305059767479942e-05,
"loss": 2.705,
"step": 53100
},
{
"epoch": 4.36,
"learning_rate": 1.1288685115441299e-05,
"loss": 2.7123,
"step": 53200
},
{
"epoch": 4.36,
"learning_rate": 1.1272310463402653e-05,
"loss": 2.7319,
"step": 53300
},
{
"epoch": 4.37,
"learning_rate": 1.125593581136401e-05,
"loss": 2.7112,
"step": 53400
},
{
"epoch": 4.38,
"learning_rate": 1.1239561159325366e-05,
"loss": 2.7509,
"step": 53500
},
{
"epoch": 4.39,
"learning_rate": 1.122318650728672e-05,
"loss": 2.7256,
"step": 53600
},
{
"epoch": 4.4,
"learning_rate": 1.1206811855248077e-05,
"loss": 2.726,
"step": 53700
},
{
"epoch": 4.4,
"learning_rate": 1.1190437203209431e-05,
"loss": 2.6912,
"step": 53800
},
{
"epoch": 4.41,
"learning_rate": 1.1174062551170788e-05,
"loss": 2.7434,
"step": 53900
},
{
"epoch": 4.42,
"learning_rate": 1.1157687899132144e-05,
"loss": 2.7085,
"step": 54000
},
{
"epoch": 4.43,
"learning_rate": 1.1141313247093499e-05,
"loss": 2.6887,
"step": 54100
},
{
"epoch": 4.44,
"learning_rate": 1.1124938595054855e-05,
"loss": 2.7507,
"step": 54200
},
{
"epoch": 4.45,
"learning_rate": 1.1108563943016213e-05,
"loss": 2.7167,
"step": 54300
},
{
"epoch": 4.45,
"learning_rate": 1.109218929097757e-05,
"loss": 2.7093,
"step": 54400
},
{
"epoch": 4.46,
"learning_rate": 1.1075814638938924e-05,
"loss": 2.7261,
"step": 54500
},
{
"epoch": 4.47,
"learning_rate": 1.105943998690028e-05,
"loss": 2.7224,
"step": 54600
},
{
"epoch": 4.48,
"learning_rate": 1.1043065334861635e-05,
"loss": 2.7195,
"step": 54700
},
{
"epoch": 4.49,
"learning_rate": 1.1026690682822991e-05,
"loss": 2.7271,
"step": 54800
},
{
"epoch": 4.49,
"learning_rate": 1.1010316030784348e-05,
"loss": 2.6997,
"step": 54900
},
{
"epoch": 4.5,
"learning_rate": 1.0993941378745702e-05,
"loss": 2.7087,
"step": 55000
},
{
"epoch": 4.51,
"learning_rate": 1.0977566726707059e-05,
"loss": 2.71,
"step": 55100
},
{
"epoch": 4.52,
"learning_rate": 1.0961192074668415e-05,
"loss": 2.748,
"step": 55200
},
{
"epoch": 4.53,
"learning_rate": 1.094481742262977e-05,
"loss": 2.7256,
"step": 55300
},
{
"epoch": 4.54,
"learning_rate": 1.0928442770591126e-05,
"loss": 2.7217,
"step": 55400
},
{
"epoch": 4.54,
"learning_rate": 1.091206811855248e-05,
"loss": 2.7287,
"step": 55500
},
{
"epoch": 4.55,
"learning_rate": 1.0895693466513837e-05,
"loss": 2.7147,
"step": 55600
},
{
"epoch": 4.56,
"learning_rate": 1.0879318814475193e-05,
"loss": 2.6992,
"step": 55700
},
{
"epoch": 4.57,
"learning_rate": 1.0862944162436548e-05,
"loss": 2.7216,
"step": 55800
},
{
"epoch": 4.58,
"learning_rate": 1.0846569510397904e-05,
"loss": 2.7001,
"step": 55900
},
{
"epoch": 4.58,
"learning_rate": 1.083019485835926e-05,
"loss": 2.7217,
"step": 56000
},
{
"epoch": 4.59,
"learning_rate": 1.0813820206320615e-05,
"loss": 2.7392,
"step": 56100
},
{
"epoch": 4.6,
"learning_rate": 1.0797445554281973e-05,
"loss": 2.7131,
"step": 56200
},
{
"epoch": 4.61,
"learning_rate": 1.078107090224333e-05,
"loss": 2.6884,
"step": 56300
},
{
"epoch": 4.62,
"learning_rate": 1.0764696250204684e-05,
"loss": 2.7238,
"step": 56400
},
{
"epoch": 4.63,
"learning_rate": 1.074832159816604e-05,
"loss": 2.7126,
"step": 56500
},
{
"epoch": 4.63,
"learning_rate": 1.0731946946127397e-05,
"loss": 2.712,
"step": 56600
},
{
"epoch": 4.64,
"learning_rate": 1.0715572294088751e-05,
"loss": 2.7054,
"step": 56700
},
{
"epoch": 4.65,
"learning_rate": 1.0699197642050108e-05,
"loss": 2.7433,
"step": 56800
},
{
"epoch": 4.66,
"learning_rate": 1.0682822990011464e-05,
"loss": 2.6941,
"step": 56900
},
{
"epoch": 4.67,
"learning_rate": 1.0666448337972819e-05,
"loss": 2.724,
"step": 57000
},
{
"epoch": 4.67,
"learning_rate": 1.0650073685934175e-05,
"loss": 2.6911,
"step": 57100
},
{
"epoch": 4.68,
"learning_rate": 1.0633699033895531e-05,
"loss": 2.7282,
"step": 57200
},
{
"epoch": 4.69,
"learning_rate": 1.0617324381856886e-05,
"loss": 2.7163,
"step": 57300
},
{
"epoch": 4.7,
"learning_rate": 1.0600949729818242e-05,
"loss": 2.7058,
"step": 57400
},
{
"epoch": 4.71,
"learning_rate": 1.0584575077779597e-05,
"loss": 2.7049,
"step": 57500
},
{
"epoch": 4.72,
"learning_rate": 1.0568200425740953e-05,
"loss": 2.7375,
"step": 57600
},
{
"epoch": 4.72,
"learning_rate": 1.055182577370231e-05,
"loss": 2.7072,
"step": 57700
},
{
"epoch": 4.73,
"learning_rate": 1.0535451121663664e-05,
"loss": 2.7157,
"step": 57800
},
{
"epoch": 4.74,
"learning_rate": 1.051907646962502e-05,
"loss": 2.7098,
"step": 57900
},
{
"epoch": 4.75,
"learning_rate": 1.0502701817586378e-05,
"loss": 2.7254,
"step": 58000
},
{
"epoch": 4.76,
"learning_rate": 1.0486327165547733e-05,
"loss": 2.6918,
"step": 58100
},
{
"epoch": 4.77,
"learning_rate": 1.046995251350909e-05,
"loss": 2.7007,
"step": 58200
},
{
"epoch": 4.77,
"learning_rate": 1.0453577861470446e-05,
"loss": 2.7673,
"step": 58300
},
{
"epoch": 4.78,
"learning_rate": 1.04372032094318e-05,
"loss": 2.687,
"step": 58400
},
{
"epoch": 4.79,
"learning_rate": 1.0420828557393157e-05,
"loss": 2.6976,
"step": 58500
},
{
"epoch": 4.8,
"learning_rate": 1.0404453905354513e-05,
"loss": 2.7166,
"step": 58600
},
{
"epoch": 4.81,
"learning_rate": 1.0388079253315868e-05,
"loss": 2.7329,
"step": 58700
},
{
"epoch": 4.81,
"learning_rate": 1.0371704601277224e-05,
"loss": 2.7008,
"step": 58800
},
{
"epoch": 4.82,
"learning_rate": 1.035532994923858e-05,
"loss": 2.7011,
"step": 58900
},
{
"epoch": 4.83,
"learning_rate": 1.0338955297199935e-05,
"loss": 2.7459,
"step": 59000
},
{
"epoch": 4.84,
"learning_rate": 1.0322580645161291e-05,
"loss": 2.6775,
"step": 59100
},
{
"epoch": 4.85,
"learning_rate": 1.0306205993122646e-05,
"loss": 2.7026,
"step": 59200
},
{
"epoch": 4.86,
"learning_rate": 1.0289831341084002e-05,
"loss": 2.7412,
"step": 59300
},
{
"epoch": 4.86,
"learning_rate": 1.0273456689045359e-05,
"loss": 2.7302,
"step": 59400
},
{
"epoch": 4.87,
"learning_rate": 1.0257082037006713e-05,
"loss": 2.7087,
"step": 59500
},
{
"epoch": 4.88,
"learning_rate": 1.024070738496807e-05,
"loss": 2.7151,
"step": 59600
},
{
"epoch": 4.89,
"learning_rate": 1.0224332732929426e-05,
"loss": 2.6936,
"step": 59700
},
{
"epoch": 4.9,
"learning_rate": 1.0207958080890784e-05,
"loss": 2.705,
"step": 59800
},
{
"epoch": 4.9,
"learning_rate": 1.0191583428852138e-05,
"loss": 2.7276,
"step": 59900
},
{
"epoch": 4.91,
"learning_rate": 1.0175208776813495e-05,
"loss": 2.7304,
"step": 60000
},
{
"epoch": 4.92,
"learning_rate": 1.015883412477485e-05,
"loss": 2.7073,
"step": 60100
},
{
"epoch": 4.93,
"learning_rate": 1.0142459472736206e-05,
"loss": 2.6806,
"step": 60200
},
{
"epoch": 4.94,
"learning_rate": 1.0126084820697562e-05,
"loss": 2.7106,
"step": 60300
},
{
"epoch": 4.95,
"learning_rate": 1.0109710168658917e-05,
"loss": 2.6818,
"step": 60400
},
{
"epoch": 4.95,
"learning_rate": 1.0093335516620273e-05,
"loss": 2.7072,
"step": 60500
},
{
"epoch": 4.96,
"learning_rate": 1.007696086458163e-05,
"loss": 2.6981,
"step": 60600
},
{
"epoch": 4.97,
"learning_rate": 1.0060586212542984e-05,
"loss": 2.6884,
"step": 60700
},
{
"epoch": 4.98,
"learning_rate": 1.004421156050434e-05,
"loss": 2.6921,
"step": 60800
},
{
"epoch": 4.99,
"learning_rate": 1.0027836908465695e-05,
"loss": 2.6885,
"step": 60900
},
{
"epoch": 4.99,
"learning_rate": 1.0011462256427051e-05,
"loss": 2.7105,
"step": 61000
},
{
"epoch": 5.0,
"eval_bleu": 941.391,
"eval_chrf": 2793.3653,
"eval_chrf_plus": 2625.3519,
"eval_gen_len": 14.666,
"eval_loss": 2.477811813354492,
"eval_runtime": 38.3865,
"eval_samples_per_second": 26.051,
"eval_steps_per_second": 1.094,
"step": 61070
},
{
"epoch": 5.0,
"learning_rate": 9.995087604388408e-06,
"loss": 2.7037,
"step": 61100
},
{
"epoch": 5.01,
"learning_rate": 9.978712952349764e-06,
"loss": 2.6766,
"step": 61200
},
{
"epoch": 5.02,
"learning_rate": 9.96233830031112e-06,
"loss": 2.6567,
"step": 61300
},
{
"epoch": 5.03,
"learning_rate": 9.945963648272475e-06,
"loss": 2.6702,
"step": 61400
},
{
"epoch": 5.04,
"learning_rate": 9.929588996233831e-06,
"loss": 2.6706,
"step": 61500
},
{
"epoch": 5.04,
"learning_rate": 9.913214344195187e-06,
"loss": 2.6327,
"step": 61600
},
{
"epoch": 5.05,
"learning_rate": 9.896839692156542e-06,
"loss": 2.6481,
"step": 61700
},
{
"epoch": 5.06,
"learning_rate": 9.880465040117898e-06,
"loss": 2.644,
"step": 61800
},
{
"epoch": 5.07,
"learning_rate": 9.864090388079253e-06,
"loss": 2.6387,
"step": 61900
},
{
"epoch": 5.08,
"learning_rate": 9.84771573604061e-06,
"loss": 2.6096,
"step": 62000
},
{
"epoch": 5.08,
"learning_rate": 9.831341084001966e-06,
"loss": 2.6466,
"step": 62100
},
{
"epoch": 5.09,
"learning_rate": 9.814966431963322e-06,
"loss": 2.639,
"step": 62200
},
{
"epoch": 5.1,
"learning_rate": 9.798591779924678e-06,
"loss": 2.6389,
"step": 62300
},
{
"epoch": 5.11,
"learning_rate": 9.782217127886033e-06,
"loss": 2.615,
"step": 62400
},
{
"epoch": 5.12,
"learning_rate": 9.76584247584739e-06,
"loss": 2.6486,
"step": 62500
},
{
"epoch": 5.13,
"learning_rate": 9.749467823808746e-06,
"loss": 2.6294,
"step": 62600
},
{
"epoch": 5.13,
"learning_rate": 9.7330931717701e-06,
"loss": 2.6415,
"step": 62700
},
{
"epoch": 5.14,
"learning_rate": 9.716718519731457e-06,
"loss": 2.6744,
"step": 62800
},
{
"epoch": 5.15,
"learning_rate": 9.700343867692811e-06,
"loss": 2.6236,
"step": 62900
},
{
"epoch": 5.16,
"learning_rate": 9.683969215654168e-06,
"loss": 2.6074,
"step": 63000
},
{
"epoch": 5.17,
"learning_rate": 9.667594563615524e-06,
"loss": 2.6673,
"step": 63100
},
{
"epoch": 5.17,
"learning_rate": 9.65121991157688e-06,
"loss": 2.6182,
"step": 63200
},
{
"epoch": 5.18,
"learning_rate": 9.634845259538237e-06,
"loss": 2.6363,
"step": 63300
},
{
"epoch": 5.19,
"learning_rate": 9.618470607499591e-06,
"loss": 2.648,
"step": 63400
},
{
"epoch": 5.2,
"learning_rate": 9.602095955460947e-06,
"loss": 2.6377,
"step": 63500
},
{
"epoch": 5.21,
"learning_rate": 9.585721303422302e-06,
"loss": 2.6505,
"step": 63600
},
{
"epoch": 5.22,
"learning_rate": 9.569346651383658e-06,
"loss": 2.6499,
"step": 63700
},
{
"epoch": 5.22,
"learning_rate": 9.552971999345015e-06,
"loss": 2.6385,
"step": 63800
},
{
"epoch": 5.23,
"learning_rate": 9.53659734730637e-06,
"loss": 2.662,
"step": 63900
},
{
"epoch": 5.24,
"learning_rate": 9.520222695267727e-06,
"loss": 2.6544,
"step": 64000
},
{
"epoch": 5.25,
"learning_rate": 9.503848043229082e-06,
"loss": 2.6315,
"step": 64100
},
{
"epoch": 5.26,
"learning_rate": 9.487473391190438e-06,
"loss": 2.6606,
"step": 64200
},
{
"epoch": 5.26,
"learning_rate": 9.471098739151795e-06,
"loss": 2.6502,
"step": 64300
},
{
"epoch": 5.27,
"learning_rate": 9.45472408711315e-06,
"loss": 2.6328,
"step": 64400
},
{
"epoch": 5.28,
"learning_rate": 9.438349435074506e-06,
"loss": 2.6469,
"step": 64500
},
{
"epoch": 5.29,
"learning_rate": 9.42197478303586e-06,
"loss": 2.6143,
"step": 64600
},
{
"epoch": 5.3,
"learning_rate": 9.405600130997217e-06,
"loss": 2.6565,
"step": 64700
},
{
"epoch": 5.31,
"learning_rate": 9.389225478958573e-06,
"loss": 2.6213,
"step": 64800
},
{
"epoch": 5.31,
"learning_rate": 9.37285082691993e-06,
"loss": 2.6449,
"step": 64900
},
{
"epoch": 5.32,
"learning_rate": 9.356476174881286e-06,
"loss": 2.6446,
"step": 65000
},
{
"epoch": 5.33,
"learning_rate": 9.34010152284264e-06,
"loss": 2.6314,
"step": 65100
},
{
"epoch": 5.34,
"learning_rate": 9.323726870803997e-06,
"loss": 2.6453,
"step": 65200
},
{
"epoch": 5.35,
"learning_rate": 9.307352218765353e-06,
"loss": 2.6418,
"step": 65300
},
{
"epoch": 5.35,
"learning_rate": 9.290977566726707e-06,
"loss": 2.6389,
"step": 65400
},
{
"epoch": 5.36,
"learning_rate": 9.274602914688064e-06,
"loss": 2.671,
"step": 65500
},
{
"epoch": 5.37,
"learning_rate": 9.258228262649418e-06,
"loss": 2.6571,
"step": 65600
},
{
"epoch": 5.38,
"learning_rate": 9.241853610610775e-06,
"loss": 2.6426,
"step": 65700
},
{
"epoch": 5.39,
"learning_rate": 9.225478958572131e-06,
"loss": 2.665,
"step": 65800
},
{
"epoch": 5.4,
"learning_rate": 9.209104306533487e-06,
"loss": 2.634,
"step": 65900
},
{
"epoch": 5.4,
"learning_rate": 9.192729654494844e-06,
"loss": 2.6479,
"step": 66000
},
{
"epoch": 5.41,
"learning_rate": 9.176355002456198e-06,
"loss": 2.6317,
"step": 66100
},
{
"epoch": 5.42,
"learning_rate": 9.159980350417555e-06,
"loss": 2.6429,
"step": 66200
},
{
"epoch": 5.43,
"learning_rate": 9.14360569837891e-06,
"loss": 2.638,
"step": 66300
},
{
"epoch": 5.44,
"learning_rate": 9.127231046340266e-06,
"loss": 2.6344,
"step": 66400
},
{
"epoch": 5.44,
"learning_rate": 9.110856394301622e-06,
"loss": 2.6152,
"step": 66500
},
{
"epoch": 5.45,
"learning_rate": 9.094481742262977e-06,
"loss": 2.6416,
"step": 66600
},
{
"epoch": 5.46,
"learning_rate": 9.078107090224335e-06,
"loss": 2.6115,
"step": 66700
},
{
"epoch": 5.47,
"learning_rate": 9.06173243818569e-06,
"loss": 2.6862,
"step": 66800
},
{
"epoch": 5.48,
"learning_rate": 9.045357786147046e-06,
"loss": 2.6273,
"step": 66900
},
{
"epoch": 5.49,
"learning_rate": 9.028983134108402e-06,
"loss": 2.6354,
"step": 67000
},
{
"epoch": 5.49,
"learning_rate": 9.012608482069757e-06,
"loss": 2.635,
"step": 67100
},
{
"epoch": 5.5,
"learning_rate": 8.996233830031113e-06,
"loss": 2.6303,
"step": 67200
},
{
"epoch": 5.51,
"learning_rate": 8.979859177992467e-06,
"loss": 2.6431,
"step": 67300
},
{
"epoch": 5.52,
"learning_rate": 8.963484525953824e-06,
"loss": 2.6356,
"step": 67400
},
{
"epoch": 5.53,
"learning_rate": 8.94710987391518e-06,
"loss": 2.6505,
"step": 67500
},
{
"epoch": 5.53,
"learning_rate": 8.930735221876535e-06,
"loss": 2.6433,
"step": 67600
},
{
"epoch": 5.54,
"learning_rate": 8.914360569837893e-06,
"loss": 2.6275,
"step": 67700
},
{
"epoch": 5.55,
"learning_rate": 8.897985917799247e-06,
"loss": 2.6287,
"step": 67800
},
{
"epoch": 5.56,
"learning_rate": 8.881611265760604e-06,
"loss": 2.6528,
"step": 67900
},
{
"epoch": 5.57,
"learning_rate": 8.86523661372196e-06,
"loss": 2.6361,
"step": 68000
},
{
"epoch": 5.58,
"learning_rate": 8.848861961683315e-06,
"loss": 2.6482,
"step": 68100
},
{
"epoch": 5.58,
"learning_rate": 8.832487309644671e-06,
"loss": 2.6487,
"step": 68200
},
{
"epoch": 5.59,
"learning_rate": 8.816112657606026e-06,
"loss": 2.6511,
"step": 68300
},
{
"epoch": 5.6,
"learning_rate": 8.799738005567382e-06,
"loss": 2.6125,
"step": 68400
},
{
"epoch": 5.61,
"learning_rate": 8.783363353528738e-06,
"loss": 2.6255,
"step": 68500
},
{
"epoch": 5.62,
"learning_rate": 8.766988701490095e-06,
"loss": 2.6203,
"step": 68600
},
{
"epoch": 5.62,
"learning_rate": 8.750614049451451e-06,
"loss": 2.6158,
"step": 68700
},
{
"epoch": 5.63,
"learning_rate": 8.734239397412806e-06,
"loss": 2.6542,
"step": 68800
},
{
"epoch": 5.64,
"learning_rate": 8.717864745374162e-06,
"loss": 2.6576,
"step": 68900
},
{
"epoch": 5.65,
"learning_rate": 8.701490093335517e-06,
"loss": 2.6632,
"step": 69000
},
{
"epoch": 5.66,
"learning_rate": 8.685115441296873e-06,
"loss": 2.6301,
"step": 69100
},
{
"epoch": 5.67,
"learning_rate": 8.66874078925823e-06,
"loss": 2.6334,
"step": 69200
},
{
"epoch": 5.67,
"learning_rate": 8.652366137219584e-06,
"loss": 2.6105,
"step": 69300
},
{
"epoch": 5.68,
"learning_rate": 8.63599148518094e-06,
"loss": 2.6296,
"step": 69400
},
{
"epoch": 5.69,
"learning_rate": 8.619616833142296e-06,
"loss": 2.6239,
"step": 69500
},
{
"epoch": 5.7,
"learning_rate": 8.603242181103653e-06,
"loss": 2.6069,
"step": 69600
},
{
"epoch": 5.71,
"learning_rate": 8.586867529065009e-06,
"loss": 2.6313,
"step": 69700
},
{
"epoch": 5.71,
"learning_rate": 8.570492877026364e-06,
"loss": 2.633,
"step": 69800
},
{
"epoch": 5.72,
"learning_rate": 8.55411822498772e-06,
"loss": 2.6343,
"step": 69900
},
{
"epoch": 5.73,
"learning_rate": 8.537743572949075e-06,
"loss": 2.6294,
"step": 70000
},
{
"epoch": 5.74,
"learning_rate": 8.521368920910431e-06,
"loss": 2.6343,
"step": 70100
},
{
"epoch": 5.75,
"learning_rate": 8.504994268871787e-06,
"loss": 2.6302,
"step": 70200
},
{
"epoch": 5.76,
"learning_rate": 8.488619616833142e-06,
"loss": 2.6118,
"step": 70300
},
{
"epoch": 5.76,
"learning_rate": 8.4722449647945e-06,
"loss": 2.6306,
"step": 70400
},
{
"epoch": 5.77,
"learning_rate": 8.455870312755855e-06,
"loss": 2.6119,
"step": 70500
},
{
"epoch": 5.78,
"learning_rate": 8.439495660717211e-06,
"loss": 2.6352,
"step": 70600
},
{
"epoch": 5.79,
"learning_rate": 8.423121008678567e-06,
"loss": 2.6078,
"step": 70700
},
{
"epoch": 5.8,
"learning_rate": 8.406746356639922e-06,
"loss": 2.6543,
"step": 70800
},
{
"epoch": 5.8,
"learning_rate": 8.390371704601278e-06,
"loss": 2.6499,
"step": 70900
},
{
"epoch": 5.81,
"learning_rate": 8.373997052562633e-06,
"loss": 2.6227,
"step": 71000
},
{
"epoch": 5.82,
"learning_rate": 8.35762240052399e-06,
"loss": 2.6355,
"step": 71100
},
{
"epoch": 5.83,
"learning_rate": 8.341247748485345e-06,
"loss": 2.6287,
"step": 71200
},
{
"epoch": 5.84,
"learning_rate": 8.324873096446702e-06,
"loss": 2.6446,
"step": 71300
},
{
"epoch": 5.85,
"learning_rate": 8.308498444408058e-06,
"loss": 2.641,
"step": 71400
},
{
"epoch": 5.85,
"learning_rate": 8.292123792369413e-06,
"loss": 2.6487,
"step": 71500
},
{
"epoch": 5.86,
"learning_rate": 8.275749140330769e-06,
"loss": 2.6324,
"step": 71600
},
{
"epoch": 5.87,
"learning_rate": 8.259374488292124e-06,
"loss": 2.6198,
"step": 71700
},
{
"epoch": 5.88,
"learning_rate": 8.24299983625348e-06,
"loss": 2.6237,
"step": 71800
},
{
"epoch": 5.89,
"learning_rate": 8.226625184214836e-06,
"loss": 2.6339,
"step": 71900
},
{
"epoch": 5.89,
"learning_rate": 8.210250532176191e-06,
"loss": 2.629,
"step": 72000
},
{
"epoch": 5.9,
"learning_rate": 8.193875880137547e-06,
"loss": 2.6365,
"step": 72100
},
{
"epoch": 5.91,
"learning_rate": 8.177501228098904e-06,
"loss": 2.6644,
"step": 72200
},
{
"epoch": 5.92,
"learning_rate": 8.16112657606026e-06,
"loss": 2.6232,
"step": 72300
},
{
"epoch": 5.93,
"learning_rate": 8.144751924021616e-06,
"loss": 2.6515,
"step": 72400
},
{
"epoch": 5.94,
"learning_rate": 8.128377271982971e-06,
"loss": 2.6166,
"step": 72500
},
{
"epoch": 5.94,
"learning_rate": 8.112002619944327e-06,
"loss": 2.6354,
"step": 72600
},
{
"epoch": 5.95,
"learning_rate": 8.095627967905682e-06,
"loss": 2.6005,
"step": 72700
},
{
"epoch": 5.96,
"learning_rate": 8.079253315867038e-06,
"loss": 2.6251,
"step": 72800
},
{
"epoch": 5.97,
"learning_rate": 8.062878663828395e-06,
"loss": 2.5943,
"step": 72900
},
{
"epoch": 5.98,
"learning_rate": 8.04650401178975e-06,
"loss": 2.5967,
"step": 73000
},
{
"epoch": 5.98,
"learning_rate": 8.030129359751105e-06,
"loss": 2.6441,
"step": 73100
},
{
"epoch": 5.99,
"learning_rate": 8.013754707712462e-06,
"loss": 2.6309,
"step": 73200
},
{
"epoch": 6.0,
"eval_bleu": 942.9051,
"eval_chrf": 2828.2232,
"eval_chrf_plus": 2657.258,
"eval_gen_len": 14.677,
"eval_loss": 2.4349613189697266,
"eval_runtime": 38.5767,
"eval_samples_per_second": 25.922,
"eval_steps_per_second": 1.089,
"step": 73284
},
{
"epoch": 6.0,
"learning_rate": 7.997380055673818e-06,
"loss": 2.6094,
"step": 73300
},
{
"epoch": 6.01,
"learning_rate": 7.981005403635174e-06,
"loss": 2.594,
"step": 73400
},
{
"epoch": 6.02,
"learning_rate": 7.964630751596529e-06,
"loss": 2.5775,
"step": 73500
},
{
"epoch": 6.03,
"learning_rate": 7.948256099557885e-06,
"loss": 2.5653,
"step": 73600
},
{
"epoch": 6.03,
"learning_rate": 7.93188144751924e-06,
"loss": 2.5773,
"step": 73700
},
{
"epoch": 6.04,
"learning_rate": 7.915506795480596e-06,
"loss": 2.585,
"step": 73800
},
{
"epoch": 6.05,
"learning_rate": 7.899132143441953e-06,
"loss": 2.5742,
"step": 73900
},
{
"epoch": 6.06,
"learning_rate": 7.882757491403307e-06,
"loss": 2.5905,
"step": 74000
},
{
"epoch": 6.07,
"learning_rate": 7.866382839364665e-06,
"loss": 2.5837,
"step": 74100
},
{
"epoch": 6.07,
"learning_rate": 7.85000818732602e-06,
"loss": 2.5667,
"step": 74200
},
{
"epoch": 6.08,
"learning_rate": 7.833633535287376e-06,
"loss": 2.5563,
"step": 74300
},
{
"epoch": 6.09,
"learning_rate": 7.817258883248731e-06,
"loss": 2.5947,
"step": 74400
},
{
"epoch": 6.1,
"learning_rate": 7.800884231210087e-06,
"loss": 2.5635,
"step": 74500
},
{
"epoch": 6.11,
"learning_rate": 7.784509579171444e-06,
"loss": 2.567,
"step": 74600
},
{
"epoch": 6.12,
"learning_rate": 7.768134927132798e-06,
"loss": 2.5654,
"step": 74700
},
{
"epoch": 6.12,
"learning_rate": 7.751760275094155e-06,
"loss": 2.573,
"step": 74800
},
{
"epoch": 6.13,
"learning_rate": 7.735385623055511e-06,
"loss": 2.5718,
"step": 74900
},
{
"epoch": 6.14,
"learning_rate": 7.719010971016867e-06,
"loss": 2.5964,
"step": 75000
},
{
"epoch": 6.15,
"learning_rate": 7.702636318978224e-06,
"loss": 2.5384,
"step": 75100
},
{
"epoch": 6.16,
"learning_rate": 7.686261666939578e-06,
"loss": 2.5768,
"step": 75200
},
{
"epoch": 6.17,
"learning_rate": 7.669887014900934e-06,
"loss": 2.6085,
"step": 75300
},
{
"epoch": 6.17,
"learning_rate": 7.653512362862289e-06,
"loss": 2.617,
"step": 75400
},
{
"epoch": 6.18,
"learning_rate": 7.637137710823645e-06,
"loss": 2.6014,
"step": 75500
},
{
"epoch": 6.19,
"learning_rate": 7.620763058785001e-06,
"loss": 2.572,
"step": 75600
},
{
"epoch": 6.2,
"learning_rate": 7.604388406746357e-06,
"loss": 2.5623,
"step": 75700
},
{
"epoch": 6.21,
"learning_rate": 7.588013754707713e-06,
"loss": 2.5994,
"step": 75800
},
{
"epoch": 6.21,
"learning_rate": 7.571639102669069e-06,
"loss": 2.5654,
"step": 75900
},
{
"epoch": 6.22,
"learning_rate": 7.555264450630425e-06,
"loss": 2.6115,
"step": 76000
},
{
"epoch": 6.23,
"learning_rate": 7.538889798591781e-06,
"loss": 2.5785,
"step": 76100
},
{
"epoch": 6.24,
"learning_rate": 7.522515146553136e-06,
"loss": 2.5755,
"step": 76200
},
{
"epoch": 6.25,
"learning_rate": 7.506140494514493e-06,
"loss": 2.6167,
"step": 76300
},
{
"epoch": 6.26,
"learning_rate": 7.489765842475848e-06,
"loss": 2.5617,
"step": 76400
},
{
"epoch": 6.26,
"learning_rate": 7.473391190437204e-06,
"loss": 2.588,
"step": 76500
},
{
"epoch": 6.27,
"learning_rate": 7.457016538398559e-06,
"loss": 2.5944,
"step": 76600
},
{
"epoch": 6.28,
"learning_rate": 7.440641886359915e-06,
"loss": 2.5729,
"step": 76700
},
{
"epoch": 6.29,
"learning_rate": 7.424267234321272e-06,
"loss": 2.5993,
"step": 76800
},
{
"epoch": 6.3,
"learning_rate": 7.407892582282627e-06,
"loss": 2.5932,
"step": 76900
},
{
"epoch": 6.3,
"learning_rate": 7.3915179302439835e-06,
"loss": 2.5664,
"step": 77000
},
{
"epoch": 6.31,
"learning_rate": 7.375143278205339e-06,
"loss": 2.5446,
"step": 77100
},
{
"epoch": 6.32,
"learning_rate": 7.3587686261666945e-06,
"loss": 2.5759,
"step": 77200
},
{
"epoch": 6.33,
"learning_rate": 7.34239397412805e-06,
"loss": 2.5856,
"step": 77300
},
{
"epoch": 6.34,
"learning_rate": 7.326019322089406e-06,
"loss": 2.5861,
"step": 77400
},
{
"epoch": 6.35,
"learning_rate": 7.309644670050762e-06,
"loss": 2.5872,
"step": 77500
},
{
"epoch": 6.35,
"learning_rate": 7.293270018012117e-06,
"loss": 2.617,
"step": 77600
},
{
"epoch": 6.36,
"learning_rate": 7.2768953659734735e-06,
"loss": 2.576,
"step": 77700
},
{
"epoch": 6.37,
"learning_rate": 7.26052071393483e-06,
"loss": 2.5765,
"step": 77800
},
{
"epoch": 6.38,
"learning_rate": 7.244146061896185e-06,
"loss": 2.5547,
"step": 77900
},
{
"epoch": 6.39,
"learning_rate": 7.227771409857542e-06,
"loss": 2.5883,
"step": 78000
},
{
"epoch": 6.39,
"learning_rate": 7.211396757818897e-06,
"loss": 2.5414,
"step": 78100
},
{
"epoch": 6.4,
"learning_rate": 7.195022105780253e-06,
"loss": 2.557,
"step": 78200
},
{
"epoch": 6.41,
"learning_rate": 7.178647453741608e-06,
"loss": 2.5618,
"step": 78300
},
{
"epoch": 6.42,
"learning_rate": 7.1622728017029644e-06,
"loss": 2.5766,
"step": 78400
},
{
"epoch": 6.43,
"learning_rate": 7.14589814966432e-06,
"loss": 2.5876,
"step": 78500
},
{
"epoch": 6.44,
"learning_rate": 7.129523497625675e-06,
"loss": 2.5599,
"step": 78600
},
{
"epoch": 6.44,
"learning_rate": 7.1131488455870326e-06,
"loss": 2.5776,
"step": 78700
},
{
"epoch": 6.45,
"learning_rate": 7.096774193548388e-06,
"loss": 2.5826,
"step": 78800
},
{
"epoch": 6.46,
"learning_rate": 7.0803995415097435e-06,
"loss": 2.5747,
"step": 78900
},
{
"epoch": 6.47,
"learning_rate": 7.0640248894711e-06,
"loss": 2.6132,
"step": 79000
},
{
"epoch": 6.48,
"learning_rate": 7.047650237432455e-06,
"loss": 2.567,
"step": 79100
},
{
"epoch": 6.48,
"learning_rate": 7.031275585393811e-06,
"loss": 2.5779,
"step": 79200
},
{
"epoch": 6.49,
"learning_rate": 7.014900933355166e-06,
"loss": 2.5782,
"step": 79300
},
{
"epoch": 6.5,
"learning_rate": 6.998526281316523e-06,
"loss": 2.5965,
"step": 79400
},
{
"epoch": 6.51,
"learning_rate": 6.982151629277878e-06,
"loss": 2.5868,
"step": 79500
},
{
"epoch": 6.52,
"learning_rate": 6.965776977239234e-06,
"loss": 2.5889,
"step": 79600
},
{
"epoch": 6.53,
"learning_rate": 6.949402325200591e-06,
"loss": 2.5912,
"step": 79700
},
{
"epoch": 6.53,
"learning_rate": 6.933027673161946e-06,
"loss": 2.576,
"step": 79800
},
{
"epoch": 6.54,
"learning_rate": 6.916653021123302e-06,
"loss": 2.5752,
"step": 79900
},
{
"epoch": 6.55,
"learning_rate": 6.900278369084657e-06,
"loss": 2.5793,
"step": 80000
},
{
"epoch": 6.56,
"learning_rate": 6.8839037170460135e-06,
"loss": 2.5879,
"step": 80100
},
{
"epoch": 6.57,
"learning_rate": 6.867529065007369e-06,
"loss": 2.5638,
"step": 80200
},
{
"epoch": 6.57,
"learning_rate": 6.8511544129687244e-06,
"loss": 2.5808,
"step": 80300
},
{
"epoch": 6.58,
"learning_rate": 6.834779760930081e-06,
"loss": 2.5688,
"step": 80400
},
{
"epoch": 6.59,
"learning_rate": 6.818405108891437e-06,
"loss": 2.5831,
"step": 80500
},
{
"epoch": 6.6,
"learning_rate": 6.8020304568527926e-06,
"loss": 2.5584,
"step": 80600
},
{
"epoch": 6.61,
"learning_rate": 6.785655804814149e-06,
"loss": 2.6074,
"step": 80700
},
{
"epoch": 6.62,
"learning_rate": 6.769281152775504e-06,
"loss": 2.5648,
"step": 80800
},
{
"epoch": 6.62,
"learning_rate": 6.75290650073686e-06,
"loss": 2.5987,
"step": 80900
},
{
"epoch": 6.63,
"learning_rate": 6.736531848698215e-06,
"loss": 2.5462,
"step": 81000
},
{
"epoch": 6.64,
"learning_rate": 6.720157196659572e-06,
"loss": 2.5563,
"step": 81100
},
{
"epoch": 6.65,
"learning_rate": 6.703782544620927e-06,
"loss": 2.6114,
"step": 81200
},
{
"epoch": 6.66,
"learning_rate": 6.687407892582283e-06,
"loss": 2.5669,
"step": 81300
},
{
"epoch": 6.66,
"learning_rate": 6.67103324054364e-06,
"loss": 2.5639,
"step": 81400
},
{
"epoch": 6.67,
"learning_rate": 6.654658588504995e-06,
"loss": 2.626,
"step": 81500
},
{
"epoch": 6.68,
"learning_rate": 6.638283936466351e-06,
"loss": 2.5719,
"step": 81600
},
{
"epoch": 6.69,
"learning_rate": 6.621909284427707e-06,
"loss": 2.5795,
"step": 81700
},
{
"epoch": 6.7,
"learning_rate": 6.6055346323890625e-06,
"loss": 2.5726,
"step": 81800
},
{
"epoch": 6.71,
"learning_rate": 6.589159980350418e-06,
"loss": 2.5499,
"step": 81900
},
{
"epoch": 6.71,
"learning_rate": 6.5727853283117735e-06,
"loss": 2.5656,
"step": 82000
},
{
"epoch": 6.72,
"learning_rate": 6.55641067627313e-06,
"loss": 2.5656,
"step": 82100
},
{
"epoch": 6.73,
"learning_rate": 6.540036024234485e-06,
"loss": 2.5595,
"step": 82200
},
{
"epoch": 6.74,
"learning_rate": 6.523661372195841e-06,
"loss": 2.5696,
"step": 82300
},
{
"epoch": 6.75,
"learning_rate": 6.507286720157198e-06,
"loss": 2.6089,
"step": 82400
},
{
"epoch": 6.75,
"learning_rate": 6.490912068118553e-06,
"loss": 2.5665,
"step": 82500
},
{
"epoch": 6.76,
"learning_rate": 6.474537416079909e-06,
"loss": 2.5973,
"step": 82600
},
{
"epoch": 6.77,
"learning_rate": 6.458162764041264e-06,
"loss": 2.5699,
"step": 82700
},
{
"epoch": 6.78,
"learning_rate": 6.441788112002621e-06,
"loss": 2.5383,
"step": 82800
},
{
"epoch": 6.79,
"learning_rate": 6.425413459963976e-06,
"loss": 2.5578,
"step": 82900
},
{
"epoch": 6.8,
"learning_rate": 6.409038807925332e-06,
"loss": 2.5557,
"step": 83000
},
{
"epoch": 6.8,
"learning_rate": 6.392664155886688e-06,
"loss": 2.5333,
"step": 83100
},
{
"epoch": 6.81,
"learning_rate": 6.3762895038480434e-06,
"loss": 2.579,
"step": 83200
},
{
"epoch": 6.82,
"learning_rate": 6.3599148518094e-06,
"loss": 2.5891,
"step": 83300
},
{
"epoch": 6.83,
"learning_rate": 6.343540199770756e-06,
"loss": 2.5803,
"step": 83400
},
{
"epoch": 6.84,
"learning_rate": 6.3271655477321116e-06,
"loss": 2.5818,
"step": 83500
},
{
"epoch": 6.84,
"learning_rate": 6.310790895693467e-06,
"loss": 2.5763,
"step": 83600
},
{
"epoch": 6.85,
"learning_rate": 6.2944162436548225e-06,
"loss": 2.5528,
"step": 83700
},
{
"epoch": 6.86,
"learning_rate": 6.278041591616179e-06,
"loss": 2.559,
"step": 83800
},
{
"epoch": 6.87,
"learning_rate": 6.261666939577534e-06,
"loss": 2.5455,
"step": 83900
},
{
"epoch": 6.88,
"learning_rate": 6.24529228753889e-06,
"loss": 2.5672,
"step": 84000
},
{
"epoch": 6.89,
"learning_rate": 6.228917635500245e-06,
"loss": 2.5598,
"step": 84100
},
{
"epoch": 6.89,
"learning_rate": 6.2125429834616024e-06,
"loss": 2.5807,
"step": 84200
},
{
"epoch": 6.9,
"learning_rate": 6.196168331422958e-06,
"loss": 2.5671,
"step": 84300
},
{
"epoch": 6.91,
"learning_rate": 6.179793679384314e-06,
"loss": 2.5396,
"step": 84400
},
{
"epoch": 6.92,
"learning_rate": 6.16341902734567e-06,
"loss": 2.5315,
"step": 84500
},
{
"epoch": 6.93,
"learning_rate": 6.147044375307025e-06,
"loss": 2.5686,
"step": 84600
},
{
"epoch": 6.93,
"learning_rate": 6.130669723268381e-06,
"loss": 2.5752,
"step": 84700
},
{
"epoch": 6.94,
"learning_rate": 6.114295071229737e-06,
"loss": 2.566,
"step": 84800
},
{
"epoch": 6.95,
"learning_rate": 6.0979204191910925e-06,
"loss": 2.5857,
"step": 84900
},
{
"epoch": 6.96,
"learning_rate": 6.081545767152448e-06,
"loss": 2.5951,
"step": 85000
},
{
"epoch": 6.97,
"learning_rate": 6.065171115113805e-06,
"loss": 2.5899,
"step": 85100
},
{
"epoch": 6.98,
"learning_rate": 6.048796463075161e-06,
"loss": 2.5783,
"step": 85200
},
{
"epoch": 6.98,
"learning_rate": 6.032421811036516e-06,
"loss": 2.5497,
"step": 85300
},
{
"epoch": 6.99,
"learning_rate": 6.0160471589978716e-06,
"loss": 2.5749,
"step": 85400
},
{
"epoch": 7.0,
"eval_bleu": 968.8997,
"eval_chrf": 2861.7868,
"eval_chrf_plus": 2687.5064,
"eval_gen_len": 14.652,
"eval_loss": 2.4077491760253906,
"eval_runtime": 38.1903,
"eval_samples_per_second": 26.185,
"eval_steps_per_second": 1.1,
"step": 85498
},
{
"epoch": 7.0,
"learning_rate": 5.999672506959228e-06,
"loss": 2.5836,
"step": 85500
},
{
"epoch": 7.01,
"learning_rate": 5.983297854920583e-06,
"loss": 2.5616,
"step": 85600
},
{
"epoch": 7.02,
"learning_rate": 5.966923202881939e-06,
"loss": 2.5122,
"step": 85700
},
{
"epoch": 7.02,
"learning_rate": 5.950548550843295e-06,
"loss": 2.5264,
"step": 85800
},
{
"epoch": 7.03,
"learning_rate": 5.934173898804651e-06,
"loss": 2.5504,
"step": 85900
},
{
"epoch": 7.04,
"learning_rate": 5.917799246766007e-06,
"loss": 2.552,
"step": 86000
},
{
"epoch": 7.05,
"learning_rate": 5.901424594727363e-06,
"loss": 2.5237,
"step": 86100
},
{
"epoch": 7.06,
"learning_rate": 5.885049942688719e-06,
"loss": 2.5496,
"step": 86200
},
{
"epoch": 7.07,
"learning_rate": 5.868675290650074e-06,
"loss": 2.5281,
"step": 86300
},
{
"epoch": 7.07,
"learning_rate": 5.85230063861143e-06,
"loss": 2.5384,
"step": 86400
},
{
"epoch": 7.08,
"learning_rate": 5.835925986572786e-06,
"loss": 2.557,
"step": 86500
},
{
"epoch": 7.09,
"learning_rate": 5.8195513345341415e-06,
"loss": 2.5591,
"step": 86600
},
{
"epoch": 7.1,
"learning_rate": 5.803176682495497e-06,
"loss": 2.5346,
"step": 86700
},
{
"epoch": 7.11,
"learning_rate": 5.7868020304568525e-06,
"loss": 2.539,
"step": 86800
},
{
"epoch": 7.11,
"learning_rate": 5.770427378418209e-06,
"loss": 2.5111,
"step": 86900
},
{
"epoch": 7.12,
"learning_rate": 5.754052726379565e-06,
"loss": 2.5263,
"step": 87000
},
{
"epoch": 7.13,
"learning_rate": 5.7376780743409215e-06,
"loss": 2.5451,
"step": 87100
},
{
"epoch": 7.14,
"learning_rate": 5.721303422302277e-06,
"loss": 2.5028,
"step": 87200
},
{
"epoch": 7.15,
"learning_rate": 5.704928770263632e-06,
"loss": 2.5378,
"step": 87300
},
{
"epoch": 7.16,
"learning_rate": 5.688554118224988e-06,
"loss": 2.5263,
"step": 87400
},
{
"epoch": 7.16,
"learning_rate": 5.672179466186344e-06,
"loss": 2.5375,
"step": 87500
},
{
"epoch": 7.17,
"learning_rate": 5.6558048141477e-06,
"loss": 2.5338,
"step": 87600
},
{
"epoch": 7.18,
"learning_rate": 5.639430162109055e-06,
"loss": 2.5378,
"step": 87700
},
{
"epoch": 7.19,
"learning_rate": 5.623055510070411e-06,
"loss": 2.5173,
"step": 87800
},
{
"epoch": 7.2,
"learning_rate": 5.606680858031768e-06,
"loss": 2.5416,
"step": 87900
},
{
"epoch": 7.2,
"learning_rate": 5.590306205993123e-06,
"loss": 2.5655,
"step": 88000
},
{
"epoch": 7.21,
"learning_rate": 5.573931553954479e-06,
"loss": 2.5181,
"step": 88100
},
{
"epoch": 7.22,
"learning_rate": 5.557556901915835e-06,
"loss": 2.5275,
"step": 88200
},
{
"epoch": 7.23,
"learning_rate": 5.5411822498771906e-06,
"loss": 2.5358,
"step": 88300
},
{
"epoch": 7.24,
"learning_rate": 5.524807597838546e-06,
"loss": 2.5359,
"step": 88400
},
{
"epoch": 7.25,
"learning_rate": 5.508432945799902e-06,
"loss": 2.5211,
"step": 88500
},
{
"epoch": 7.25,
"learning_rate": 5.492058293761258e-06,
"loss": 2.5447,
"step": 88600
},
{
"epoch": 7.26,
"learning_rate": 5.475683641722613e-06,
"loss": 2.5232,
"step": 88700
},
{
"epoch": 7.27,
"learning_rate": 5.4593089896839705e-06,
"loss": 2.5378,
"step": 88800
},
{
"epoch": 7.28,
"learning_rate": 5.442934337645326e-06,
"loss": 2.5483,
"step": 88900
},
{
"epoch": 7.29,
"learning_rate": 5.4265596856066815e-06,
"loss": 2.5405,
"step": 89000
},
{
"epoch": 7.29,
"learning_rate": 5.410185033568037e-06,
"loss": 2.509,
"step": 89100
},
{
"epoch": 7.3,
"learning_rate": 5.393810381529393e-06,
"loss": 2.5231,
"step": 89200
},
{
"epoch": 7.31,
"learning_rate": 5.377435729490749e-06,
"loss": 2.5538,
"step": 89300
},
{
"epoch": 7.32,
"learning_rate": 5.361061077452104e-06,
"loss": 2.5407,
"step": 89400
},
{
"epoch": 7.33,
"learning_rate": 5.34468642541346e-06,
"loss": 2.5306,
"step": 89500
},
{
"epoch": 7.34,
"learning_rate": 5.328311773374816e-06,
"loss": 2.5821,
"step": 89600
},
{
"epoch": 7.34,
"learning_rate": 5.311937121336172e-06,
"loss": 2.5328,
"step": 89700
},
{
"epoch": 7.35,
"learning_rate": 5.295562469297529e-06,
"loss": 2.5458,
"step": 89800
},
{
"epoch": 7.36,
"learning_rate": 5.279187817258884e-06,
"loss": 2.5367,
"step": 89900
},
{
"epoch": 7.37,
"learning_rate": 5.26281316522024e-06,
"loss": 2.5333,
"step": 90000
},
{
"epoch": 7.38,
"learning_rate": 5.246438513181595e-06,
"loss": 2.5492,
"step": 90100
},
{
"epoch": 7.38,
"learning_rate": 5.230063861142951e-06,
"loss": 2.5078,
"step": 90200
},
{
"epoch": 7.39,
"learning_rate": 5.213689209104307e-06,
"loss": 2.513,
"step": 90300
},
{
"epoch": 7.4,
"learning_rate": 5.197314557065662e-06,
"loss": 2.5322,
"step": 90400
},
{
"epoch": 7.41,
"learning_rate": 5.180939905027018e-06,
"loss": 2.5512,
"step": 90500
},
{
"epoch": 7.42,
"learning_rate": 5.164565252988375e-06,
"loss": 2.5409,
"step": 90600
},
{
"epoch": 7.43,
"learning_rate": 5.1481906009497305e-06,
"loss": 2.5273,
"step": 90700
},
{
"epoch": 7.43,
"learning_rate": 5.131815948911086e-06,
"loss": 2.5315,
"step": 90800
},
{
"epoch": 7.44,
"learning_rate": 5.115441296872442e-06,
"loss": 2.5052,
"step": 90900
},
{
"epoch": 7.45,
"learning_rate": 5.099066644833798e-06,
"loss": 2.5673,
"step": 91000
},
{
"epoch": 7.46,
"learning_rate": 5.082691992795153e-06,
"loss": 2.5222,
"step": 91100
},
{
"epoch": 7.47,
"learning_rate": 5.06631734075651e-06,
"loss": 2.5104,
"step": 91200
},
{
"epoch": 7.48,
"learning_rate": 5.049942688717865e-06,
"loss": 2.5237,
"step": 91300
},
{
"epoch": 7.48,
"learning_rate": 5.0335680366792205e-06,
"loss": 2.5361,
"step": 91400
},
{
"epoch": 7.49,
"learning_rate": 5.017193384640576e-06,
"loss": 2.517,
"step": 91500
},
{
"epoch": 7.5,
"learning_rate": 5.000818732601933e-06,
"loss": 2.5485,
"step": 91600
},
{
"epoch": 7.51,
"learning_rate": 4.984444080563288e-06,
"loss": 2.5295,
"step": 91700
},
{
"epoch": 7.52,
"learning_rate": 4.968069428524644e-06,
"loss": 2.5309,
"step": 91800
},
{
"epoch": 7.52,
"learning_rate": 4.9516947764860005e-06,
"loss": 2.5199,
"step": 91900
},
{
"epoch": 7.53,
"learning_rate": 4.935320124447356e-06,
"loss": 2.5216,
"step": 92000
},
{
"epoch": 7.54,
"learning_rate": 4.918945472408711e-06,
"loss": 2.5298,
"step": 92100
},
{
"epoch": 7.55,
"learning_rate": 4.902570820370067e-06,
"loss": 2.5084,
"step": 92200
},
{
"epoch": 7.56,
"learning_rate": 4.886196168331423e-06,
"loss": 2.4954,
"step": 92300
},
{
"epoch": 7.57,
"learning_rate": 4.8698215162927795e-06,
"loss": 2.5258,
"step": 92400
},
{
"epoch": 7.57,
"learning_rate": 4.853446864254135e-06,
"loss": 2.5319,
"step": 92500
},
{
"epoch": 7.58,
"learning_rate": 4.8370722122154905e-06,
"loss": 2.5105,
"step": 92600
},
{
"epoch": 7.59,
"learning_rate": 4.820697560176847e-06,
"loss": 2.5279,
"step": 92700
},
{
"epoch": 7.6,
"learning_rate": 4.804322908138202e-06,
"loss": 2.5328,
"step": 92800
},
{
"epoch": 7.61,
"learning_rate": 4.787948256099559e-06,
"loss": 2.5287,
"step": 92900
},
{
"epoch": 7.61,
"learning_rate": 4.771573604060914e-06,
"loss": 2.5093,
"step": 93000
},
{
"epoch": 7.62,
"learning_rate": 4.75519895202227e-06,
"loss": 2.5184,
"step": 93100
},
{
"epoch": 7.63,
"learning_rate": 4.738824299983626e-06,
"loss": 2.5407,
"step": 93200
},
{
"epoch": 7.64,
"learning_rate": 4.722449647944981e-06,
"loss": 2.5333,
"step": 93300
},
{
"epoch": 7.65,
"learning_rate": 4.706074995906338e-06,
"loss": 2.4874,
"step": 93400
},
{
"epoch": 7.66,
"learning_rate": 4.689700343867693e-06,
"loss": 2.5334,
"step": 93500
},
{
"epoch": 7.66,
"learning_rate": 4.6733256918290495e-06,
"loss": 2.5413,
"step": 93600
},
{
"epoch": 7.67,
"learning_rate": 4.656951039790405e-06,
"loss": 2.5073,
"step": 93700
},
{
"epoch": 7.68,
"learning_rate": 4.6405763877517605e-06,
"loss": 2.5276,
"step": 93800
},
{
"epoch": 7.69,
"learning_rate": 4.624201735713117e-06,
"loss": 2.5264,
"step": 93900
},
{
"epoch": 7.7,
"learning_rate": 4.607827083674472e-06,
"loss": 2.5465,
"step": 94000
},
{
"epoch": 7.7,
"learning_rate": 4.591452431635829e-06,
"loss": 2.5404,
"step": 94100
},
{
"epoch": 7.71,
"learning_rate": 4.575077779597184e-06,
"loss": 2.5142,
"step": 94200
},
{
"epoch": 7.72,
"learning_rate": 4.5587031275585395e-06,
"loss": 2.5218,
"step": 94300
},
{
"epoch": 7.73,
"learning_rate": 4.542328475519895e-06,
"loss": 2.5696,
"step": 94400
},
{
"epoch": 7.74,
"learning_rate": 4.525953823481251e-06,
"loss": 2.5272,
"step": 94500
},
{
"epoch": 7.75,
"learning_rate": 4.509579171442608e-06,
"loss": 2.5121,
"step": 94600
},
{
"epoch": 7.75,
"learning_rate": 4.493204519403963e-06,
"loss": 2.5392,
"step": 94700
},
{
"epoch": 7.76,
"learning_rate": 4.476829867365319e-06,
"loss": 2.5215,
"step": 94800
},
{
"epoch": 7.77,
"learning_rate": 4.460455215326674e-06,
"loss": 2.5177,
"step": 94900
},
{
"epoch": 7.78,
"learning_rate": 4.4440805632880304e-06,
"loss": 2.4851,
"step": 95000
},
{
"epoch": 7.79,
"learning_rate": 4.427705911249387e-06,
"loss": 2.5696,
"step": 95100
},
{
"epoch": 7.79,
"learning_rate": 4.411331259210742e-06,
"loss": 2.4741,
"step": 95200
},
{
"epoch": 7.8,
"learning_rate": 4.394956607172098e-06,
"loss": 2.5363,
"step": 95300
},
{
"epoch": 7.81,
"learning_rate": 4.378581955133453e-06,
"loss": 2.5258,
"step": 95400
},
{
"epoch": 7.82,
"learning_rate": 4.3622073030948095e-06,
"loss": 2.5155,
"step": 95500
},
{
"epoch": 7.83,
"learning_rate": 4.345832651056166e-06,
"loss": 2.5141,
"step": 95600
},
{
"epoch": 7.84,
"learning_rate": 4.329457999017521e-06,
"loss": 2.4956,
"step": 95700
},
{
"epoch": 7.84,
"learning_rate": 4.313083346978877e-06,
"loss": 2.5107,
"step": 95800
},
{
"epoch": 7.85,
"learning_rate": 4.296708694940233e-06,
"loss": 2.5159,
"step": 95900
},
{
"epoch": 7.86,
"learning_rate": 4.280334042901589e-06,
"loss": 2.5159,
"step": 96000
},
{
"epoch": 7.87,
"learning_rate": 4.263959390862945e-06,
"loss": 2.5321,
"step": 96100
},
{
"epoch": 7.88,
"learning_rate": 4.2475847388243e-06,
"loss": 2.5295,
"step": 96200
},
{
"epoch": 7.88,
"learning_rate": 4.231210086785656e-06,
"loss": 2.5022,
"step": 96300
},
{
"epoch": 7.89,
"learning_rate": 4.214835434747012e-06,
"loss": 2.5259,
"step": 96400
},
{
"epoch": 7.9,
"learning_rate": 4.198460782708368e-06,
"loss": 2.4996,
"step": 96500
},
{
"epoch": 7.91,
"learning_rate": 4.182086130669724e-06,
"loss": 2.5113,
"step": 96600
},
{
"epoch": 7.92,
"learning_rate": 4.1657114786310795e-06,
"loss": 2.5115,
"step": 96700
},
{
"epoch": 7.93,
"learning_rate": 4.149336826592435e-06,
"loss": 2.5159,
"step": 96800
},
{
"epoch": 7.93,
"learning_rate": 4.132962174553791e-06,
"loss": 2.5268,
"step": 96900
},
{
"epoch": 7.94,
"learning_rate": 4.116587522515147e-06,
"loss": 2.5041,
"step": 97000
},
{
"epoch": 7.95,
"learning_rate": 4.100212870476502e-06,
"loss": 2.5116,
"step": 97100
},
{
"epoch": 7.96,
"learning_rate": 4.0838382184378586e-06,
"loss": 2.5062,
"step": 97200
},
{
"epoch": 7.97,
"learning_rate": 4.067463566399215e-06,
"loss": 2.54,
"step": 97300
},
{
"epoch": 7.97,
"learning_rate": 4.05108891436057e-06,
"loss": 2.5216,
"step": 97400
},
{
"epoch": 7.98,
"learning_rate": 4.034714262321926e-06,
"loss": 2.5035,
"step": 97500
},
{
"epoch": 7.99,
"learning_rate": 4.018339610283281e-06,
"loss": 2.5216,
"step": 97600
},
{
"epoch": 8.0,
"learning_rate": 4.001964958244638e-06,
"loss": 2.5407,
"step": 97700
},
{
"epoch": 8.0,
"eval_bleu": 980.1715,
"eval_chrf": 2869.0816,
"eval_chrf_plus": 2698.0447,
"eval_gen_len": 14.656,
"eval_loss": 2.388012170791626,
"eval_runtime": 38.854,
"eval_samples_per_second": 25.737,
"eval_steps_per_second": 1.081,
"step": 97712
},
{
"epoch": 8.01,
"learning_rate": 3.985590306205994e-06,
"loss": 2.4997,
"step": 97800
},
{
"epoch": 8.02,
"learning_rate": 3.9692156541673494e-06,
"loss": 2.5192,
"step": 97900
},
{
"epoch": 8.02,
"learning_rate": 3.952841002128705e-06,
"loss": 2.477,
"step": 98000
},
{
"epoch": 8.03,
"learning_rate": 3.93646635009006e-06,
"loss": 2.5026,
"step": 98100
},
{
"epoch": 8.04,
"learning_rate": 3.920091698051417e-06,
"loss": 2.5002,
"step": 98200
},
{
"epoch": 8.05,
"learning_rate": 3.903717046012773e-06,
"loss": 2.4938,
"step": 98300
},
{
"epoch": 8.06,
"learning_rate": 3.8873423939741285e-06,
"loss": 2.488,
"step": 98400
},
{
"epoch": 8.06,
"learning_rate": 3.870967741935484e-06,
"loss": 2.5018,
"step": 98500
},
{
"epoch": 8.07,
"learning_rate": 3.8545930898968395e-06,
"loss": 2.5152,
"step": 98600
},
{
"epoch": 8.08,
"learning_rate": 3.838218437858196e-06,
"loss": 2.483,
"step": 98700
},
{
"epoch": 8.09,
"learning_rate": 3.821843785819552e-06,
"loss": 2.495,
"step": 98800
},
{
"epoch": 8.1,
"learning_rate": 3.8054691337809076e-06,
"loss": 2.5065,
"step": 98900
},
{
"epoch": 8.11,
"learning_rate": 3.789094481742263e-06,
"loss": 2.5051,
"step": 99000
},
{
"epoch": 8.11,
"learning_rate": 3.772719829703619e-06,
"loss": 2.5001,
"step": 99100
},
{
"epoch": 8.12,
"learning_rate": 3.756345177664975e-06,
"loss": 2.5074,
"step": 99200
},
{
"epoch": 8.13,
"learning_rate": 3.7399705256263308e-06,
"loss": 2.4961,
"step": 99300
},
{
"epoch": 8.14,
"learning_rate": 3.7235958735876867e-06,
"loss": 2.5006,
"step": 99400
},
{
"epoch": 8.15,
"learning_rate": 3.707221221549042e-06,
"loss": 2.4864,
"step": 99500
},
{
"epoch": 8.15,
"learning_rate": 3.6908465695103985e-06,
"loss": 2.4795,
"step": 99600
},
{
"epoch": 8.16,
"learning_rate": 3.674471917471754e-06,
"loss": 2.4886,
"step": 99700
},
{
"epoch": 8.17,
"learning_rate": 3.65809726543311e-06,
"loss": 2.4853,
"step": 99800
},
{
"epoch": 8.18,
"learning_rate": 3.6417226133944653e-06,
"loss": 2.5001,
"step": 99900
},
{
"epoch": 8.19,
"learning_rate": 3.6253479613558212e-06,
"loss": 2.5031,
"step": 100000
},
{
"epoch": 8.2,
"learning_rate": 3.6089733093171776e-06,
"loss": 2.4934,
"step": 100100
},
{
"epoch": 8.2,
"learning_rate": 3.592598657278533e-06,
"loss": 2.5008,
"step": 100200
},
{
"epoch": 8.21,
"learning_rate": 3.576224005239889e-06,
"loss": 2.4825,
"step": 100300
},
{
"epoch": 8.22,
"learning_rate": 3.5598493532012444e-06,
"loss": 2.4996,
"step": 100400
},
{
"epoch": 8.23,
"learning_rate": 3.5434747011626007e-06,
"loss": 2.5029,
"step": 100500
},
{
"epoch": 8.24,
"learning_rate": 3.5271000491239566e-06,
"loss": 2.4722,
"step": 100600
},
{
"epoch": 8.24,
"learning_rate": 3.510725397085312e-06,
"loss": 2.4791,
"step": 100700
},
{
"epoch": 8.25,
"learning_rate": 3.494350745046668e-06,
"loss": 2.4741,
"step": 100800
},
{
"epoch": 8.26,
"learning_rate": 3.4779760930080235e-06,
"loss": 2.5113,
"step": 100900
},
{
"epoch": 8.27,
"learning_rate": 3.46160144096938e-06,
"loss": 2.4916,
"step": 101000
},
{
"epoch": 8.28,
"learning_rate": 3.4452267889307357e-06,
"loss": 2.4865,
"step": 101100
},
{
"epoch": 8.29,
"learning_rate": 3.428852136892091e-06,
"loss": 2.4891,
"step": 101200
},
{
"epoch": 8.29,
"learning_rate": 3.412477484853447e-06,
"loss": 2.484,
"step": 101300
},
{
"epoch": 8.3,
"learning_rate": 3.3961028328148026e-06,
"loss": 2.4827,
"step": 101400
},
{
"epoch": 8.31,
"learning_rate": 3.379728180776159e-06,
"loss": 2.5062,
"step": 101500
},
{
"epoch": 8.32,
"learning_rate": 3.363353528737515e-06,
"loss": 2.5128,
"step": 101600
},
{
"epoch": 8.33,
"learning_rate": 3.3469788766988703e-06,
"loss": 2.508,
"step": 101700
},
{
"epoch": 8.33,
"learning_rate": 3.330604224660226e-06,
"loss": 2.4933,
"step": 101800
},
{
"epoch": 8.34,
"learning_rate": 3.314229572621582e-06,
"loss": 2.5001,
"step": 101900
},
{
"epoch": 8.35,
"learning_rate": 3.297854920582938e-06,
"loss": 2.4936,
"step": 102000
},
{
"epoch": 8.36,
"learning_rate": 3.281480268544294e-06,
"loss": 2.4793,
"step": 102100
},
{
"epoch": 8.37,
"learning_rate": 3.2651056165056494e-06,
"loss": 2.5187,
"step": 102200
},
{
"epoch": 8.38,
"learning_rate": 3.2487309644670053e-06,
"loss": 2.5113,
"step": 102300
},
{
"epoch": 8.38,
"learning_rate": 3.232356312428361e-06,
"loss": 2.5055,
"step": 102400
},
{
"epoch": 8.39,
"learning_rate": 3.215981660389717e-06,
"loss": 2.5111,
"step": 102500
},
{
"epoch": 8.4,
"learning_rate": 3.1996070083510725e-06,
"loss": 2.5075,
"step": 102600
},
{
"epoch": 8.41,
"learning_rate": 3.1832323563124284e-06,
"loss": 2.4961,
"step": 102700
},
{
"epoch": 8.42,
"learning_rate": 3.1668577042737848e-06,
"loss": 2.5245,
"step": 102800
},
{
"epoch": 8.42,
"learning_rate": 3.1504830522351402e-06,
"loss": 2.4936,
"step": 102900
},
{
"epoch": 8.43,
"learning_rate": 3.134108400196496e-06,
"loss": 2.4891,
"step": 103000
},
{
"epoch": 8.44,
"learning_rate": 3.1177337481578516e-06,
"loss": 2.5013,
"step": 103100
},
{
"epoch": 8.45,
"learning_rate": 3.1013590961192075e-06,
"loss": 2.4884,
"step": 103200
},
{
"epoch": 8.46,
"learning_rate": 3.084984444080564e-06,
"loss": 2.4928,
"step": 103300
},
{
"epoch": 8.47,
"learning_rate": 3.0686097920419193e-06,
"loss": 2.4995,
"step": 103400
},
{
"epoch": 8.47,
"learning_rate": 3.0522351400032752e-06,
"loss": 2.5228,
"step": 103500
},
{
"epoch": 8.48,
"learning_rate": 3.0358604879646307e-06,
"loss": 2.5199,
"step": 103600
},
{
"epoch": 8.49,
"learning_rate": 3.019485835925987e-06,
"loss": 2.4869,
"step": 103700
},
{
"epoch": 8.5,
"learning_rate": 3.003111183887343e-06,
"loss": 2.497,
"step": 103800
},
{
"epoch": 8.51,
"learning_rate": 2.9867365318486984e-06,
"loss": 2.4866,
"step": 103900
},
{
"epoch": 8.51,
"learning_rate": 2.9703618798100543e-06,
"loss": 2.4958,
"step": 104000
},
{
"epoch": 8.52,
"learning_rate": 2.95398722777141e-06,
"loss": 2.494,
"step": 104100
},
{
"epoch": 8.53,
"learning_rate": 2.937612575732766e-06,
"loss": 2.4919,
"step": 104200
},
{
"epoch": 8.54,
"learning_rate": 2.921237923694122e-06,
"loss": 2.5,
"step": 104300
},
{
"epoch": 8.55,
"learning_rate": 2.9048632716554775e-06,
"loss": 2.5147,
"step": 104400
},
{
"epoch": 8.56,
"learning_rate": 2.8884886196168334e-06,
"loss": 2.4622,
"step": 104500
},
{
"epoch": 8.56,
"learning_rate": 2.872113967578189e-06,
"loss": 2.4919,
"step": 104600
},
{
"epoch": 8.57,
"learning_rate": 2.855739315539545e-06,
"loss": 2.4882,
"step": 104700
},
{
"epoch": 8.58,
"learning_rate": 2.839364663500901e-06,
"loss": 2.5035,
"step": 104800
},
{
"epoch": 8.59,
"learning_rate": 2.8229900114622566e-06,
"loss": 2.49,
"step": 104900
},
{
"epoch": 8.6,
"learning_rate": 2.8066153594236125e-06,
"loss": 2.5085,
"step": 105000
},
{
"epoch": 8.6,
"learning_rate": 2.7902407073849684e-06,
"loss": 2.522,
"step": 105100
},
{
"epoch": 8.61,
"learning_rate": 2.7738660553463243e-06,
"loss": 2.5118,
"step": 105200
},
{
"epoch": 8.62,
"learning_rate": 2.7574914033076798e-06,
"loss": 2.4968,
"step": 105300
},
{
"epoch": 8.63,
"learning_rate": 2.7411167512690357e-06,
"loss": 2.5011,
"step": 105400
},
{
"epoch": 8.64,
"learning_rate": 2.7247420992303916e-06,
"loss": 2.4636,
"step": 105500
},
{
"epoch": 8.65,
"learning_rate": 2.7083674471917475e-06,
"loss": 2.5283,
"step": 105600
},
{
"epoch": 8.65,
"learning_rate": 2.6919927951531034e-06,
"loss": 2.5078,
"step": 105700
},
{
"epoch": 8.66,
"learning_rate": 2.675618143114459e-06,
"loss": 2.4719,
"step": 105800
},
{
"epoch": 8.67,
"learning_rate": 2.6592434910758147e-06,
"loss": 2.4869,
"step": 105900
},
{
"epoch": 8.68,
"learning_rate": 2.642868839037171e-06,
"loss": 2.5042,
"step": 106000
},
{
"epoch": 8.69,
"learning_rate": 2.6264941869985265e-06,
"loss": 2.4742,
"step": 106100
},
{
"epoch": 8.69,
"learning_rate": 2.6101195349598824e-06,
"loss": 2.469,
"step": 106200
},
{
"epoch": 8.7,
"learning_rate": 2.593744882921238e-06,
"loss": 2.4729,
"step": 106300
},
{
"epoch": 8.71,
"learning_rate": 2.577370230882594e-06,
"loss": 2.4797,
"step": 106400
},
{
"epoch": 8.72,
"learning_rate": 2.56099557884395e-06,
"loss": 2.5034,
"step": 106500
},
{
"epoch": 8.73,
"learning_rate": 2.5446209268053056e-06,
"loss": 2.4873,
"step": 106600
},
{
"epoch": 8.74,
"learning_rate": 2.5282462747666615e-06,
"loss": 2.4812,
"step": 106700
},
{
"epoch": 8.74,
"learning_rate": 2.511871622728017e-06,
"loss": 2.5116,
"step": 106800
},
{
"epoch": 8.75,
"learning_rate": 2.495496970689373e-06,
"loss": 2.5066,
"step": 106900
},
{
"epoch": 8.76,
"learning_rate": 2.479122318650729e-06,
"loss": 2.5003,
"step": 107000
},
{
"epoch": 8.77,
"learning_rate": 2.4627476666120847e-06,
"loss": 2.4658,
"step": 107100
},
{
"epoch": 8.78,
"learning_rate": 2.4463730145734406e-06,
"loss": 2.483,
"step": 107200
},
{
"epoch": 8.79,
"learning_rate": 2.4299983625347965e-06,
"loss": 2.4675,
"step": 107300
},
{
"epoch": 8.79,
"learning_rate": 2.413623710496152e-06,
"loss": 2.4703,
"step": 107400
},
{
"epoch": 8.8,
"learning_rate": 2.3972490584575083e-06,
"loss": 2.4838,
"step": 107500
},
{
"epoch": 8.81,
"learning_rate": 2.3808744064188638e-06,
"loss": 2.4974,
"step": 107600
},
{
"epoch": 8.82,
"learning_rate": 2.3644997543802197e-06,
"loss": 2.505,
"step": 107700
},
{
"epoch": 8.83,
"learning_rate": 2.3481251023415756e-06,
"loss": 2.4754,
"step": 107800
},
{
"epoch": 8.83,
"learning_rate": 2.331750450302931e-06,
"loss": 2.5193,
"step": 107900
},
{
"epoch": 8.84,
"learning_rate": 2.315375798264287e-06,
"loss": 2.4847,
"step": 108000
},
{
"epoch": 8.85,
"learning_rate": 2.299001146225643e-06,
"loss": 2.4857,
"step": 108100
},
{
"epoch": 8.86,
"learning_rate": 2.2826264941869988e-06,
"loss": 2.5072,
"step": 108200
},
{
"epoch": 8.87,
"learning_rate": 2.2662518421483547e-06,
"loss": 2.4878,
"step": 108300
},
{
"epoch": 8.88,
"learning_rate": 2.24987719010971e-06,
"loss": 2.4876,
"step": 108400
},
{
"epoch": 8.88,
"learning_rate": 2.233502538071066e-06,
"loss": 2.4714,
"step": 108500
},
{
"epoch": 8.89,
"learning_rate": 2.217127886032422e-06,
"loss": 2.5033,
"step": 108600
},
{
"epoch": 8.9,
"learning_rate": 2.200753233993778e-06,
"loss": 2.5034,
"step": 108700
},
{
"epoch": 8.91,
"learning_rate": 2.1843785819551337e-06,
"loss": 2.4987,
"step": 108800
},
{
"epoch": 8.92,
"learning_rate": 2.1680039299164896e-06,
"loss": 2.5166,
"step": 108900
},
{
"epoch": 8.92,
"learning_rate": 2.151629277877845e-06,
"loss": 2.5031,
"step": 109000
},
{
"epoch": 8.93,
"learning_rate": 2.135254625839201e-06,
"loss": 2.4761,
"step": 109100
},
{
"epoch": 8.94,
"learning_rate": 2.118879973800557e-06,
"loss": 2.486,
"step": 109200
},
{
"epoch": 8.95,
"learning_rate": 2.102505321761913e-06,
"loss": 2.5092,
"step": 109300
},
{
"epoch": 8.96,
"learning_rate": 2.0861306697232687e-06,
"loss": 2.5162,
"step": 109400
},
{
"epoch": 8.97,
"learning_rate": 2.069756017684624e-06,
"loss": 2.4616,
"step": 109500
},
{
"epoch": 8.97,
"learning_rate": 2.05338136564598e-06,
"loss": 2.5059,
"step": 109600
},
{
"epoch": 8.98,
"learning_rate": 2.037006713607336e-06,
"loss": 2.4782,
"step": 109700
},
{
"epoch": 8.99,
"learning_rate": 2.020632061568692e-06,
"loss": 2.5064,
"step": 109800
},
{
"epoch": 9.0,
"learning_rate": 2.004257409530048e-06,
"loss": 2.5296,
"step": 109900
},
{
"epoch": 9.0,
"eval_bleu": 983.7557,
"eval_chrf": 2873.9646,
"eval_chrf_plus": 2702.4064,
"eval_gen_len": 14.62,
"eval_loss": 2.3763132095336914,
"eval_runtime": 38.7822,
"eval_samples_per_second": 25.785,
"eval_steps_per_second": 1.083,
"step": 109926
},
{
"epoch": 9.01,
"learning_rate": 1.9878827574914033e-06,
"loss": 2.4896,
"step": 110000
},
{
"epoch": 9.01,
"learning_rate": 1.971508105452759e-06,
"loss": 2.4685,
"step": 110100
},
{
"epoch": 9.02,
"learning_rate": 1.955133453414115e-06,
"loss": 2.4665,
"step": 110200
},
{
"epoch": 9.03,
"learning_rate": 1.938758801375471e-06,
"loss": 2.4684,
"step": 110300
},
{
"epoch": 9.04,
"learning_rate": 1.922384149336827e-06,
"loss": 2.4761,
"step": 110400
},
{
"epoch": 9.05,
"learning_rate": 1.9060094972981828e-06,
"loss": 2.4977,
"step": 110500
},
{
"epoch": 9.06,
"learning_rate": 1.8896348452595385e-06,
"loss": 2.4788,
"step": 110600
},
{
"epoch": 9.06,
"learning_rate": 1.8732601932208942e-06,
"loss": 2.4939,
"step": 110700
},
{
"epoch": 9.07,
"learning_rate": 1.85688554118225e-06,
"loss": 2.4984,
"step": 110800
},
{
"epoch": 9.08,
"learning_rate": 1.8405108891436058e-06,
"loss": 2.4748,
"step": 110900
},
{
"epoch": 9.09,
"learning_rate": 1.8241362371049617e-06,
"loss": 2.4891,
"step": 111000
},
{
"epoch": 9.1,
"learning_rate": 1.8077615850663173e-06,
"loss": 2.4635,
"step": 111100
},
{
"epoch": 9.1,
"learning_rate": 1.7913869330276735e-06,
"loss": 2.4936,
"step": 111200
},
{
"epoch": 9.11,
"learning_rate": 1.7750122809890291e-06,
"loss": 2.4688,
"step": 111300
},
{
"epoch": 9.12,
"learning_rate": 1.7586376289503848e-06,
"loss": 2.4682,
"step": 111400
},
{
"epoch": 9.13,
"learning_rate": 1.7422629769117407e-06,
"loss": 2.4697,
"step": 111500
},
{
"epoch": 9.14,
"learning_rate": 1.7258883248730964e-06,
"loss": 2.4793,
"step": 111600
},
{
"epoch": 9.15,
"learning_rate": 1.7095136728344525e-06,
"loss": 2.4898,
"step": 111700
},
{
"epoch": 9.15,
"learning_rate": 1.6931390207958082e-06,
"loss": 2.4856,
"step": 111800
},
{
"epoch": 9.16,
"learning_rate": 1.6767643687571641e-06,
"loss": 2.4347,
"step": 111900
},
{
"epoch": 9.17,
"learning_rate": 1.6603897167185198e-06,
"loss": 2.4829,
"step": 112000
},
{
"epoch": 9.18,
"learning_rate": 1.6440150646798757e-06,
"loss": 2.4612,
"step": 112100
},
{
"epoch": 9.19,
"learning_rate": 1.6276404126412314e-06,
"loss": 2.4394,
"step": 112200
},
{
"epoch": 9.19,
"learning_rate": 1.6112657606025873e-06,
"loss": 2.5252,
"step": 112300
},
{
"epoch": 9.2,
"learning_rate": 1.5948911085639432e-06,
"loss": 2.4966,
"step": 112400
},
{
"epoch": 9.21,
"learning_rate": 1.578516456525299e-06,
"loss": 2.4586,
"step": 112500
},
{
"epoch": 9.22,
"learning_rate": 1.5621418044866548e-06,
"loss": 2.463,
"step": 112600
},
{
"epoch": 9.23,
"learning_rate": 1.5457671524480105e-06,
"loss": 2.4873,
"step": 112700
},
{
"epoch": 9.24,
"learning_rate": 1.5293925004093666e-06,
"loss": 2.4735,
"step": 112800
},
{
"epoch": 9.24,
"learning_rate": 1.5130178483707223e-06,
"loss": 2.4959,
"step": 112900
},
{
"epoch": 9.25,
"learning_rate": 1.496643196332078e-06,
"loss": 2.4856,
"step": 113000
},
{
"epoch": 9.26,
"learning_rate": 1.4802685442934339e-06,
"loss": 2.4681,
"step": 113100
},
{
"epoch": 9.27,
"learning_rate": 1.4638938922547896e-06,
"loss": 2.4575,
"step": 113200
},
{
"epoch": 9.28,
"learning_rate": 1.4475192402161457e-06,
"loss": 2.4866,
"step": 113300
},
{
"epoch": 9.28,
"learning_rate": 1.4311445881775014e-06,
"loss": 2.457,
"step": 113400
},
{
"epoch": 9.29,
"learning_rate": 1.4147699361388573e-06,
"loss": 2.4792,
"step": 113500
},
{
"epoch": 9.3,
"learning_rate": 1.398395284100213e-06,
"loss": 2.4699,
"step": 113600
},
{
"epoch": 9.31,
"learning_rate": 1.3820206320615687e-06,
"loss": 2.4806,
"step": 113700
},
{
"epoch": 9.32,
"learning_rate": 1.3656459800229246e-06,
"loss": 2.4692,
"step": 113800
},
{
"epoch": 9.33,
"learning_rate": 1.3492713279842802e-06,
"loss": 2.4843,
"step": 113900
},
{
"epoch": 9.33,
"learning_rate": 1.3328966759456364e-06,
"loss": 2.4625,
"step": 114000
},
{
"epoch": 9.34,
"learning_rate": 1.316522023906992e-06,
"loss": 2.4772,
"step": 114100
},
{
"epoch": 9.35,
"learning_rate": 1.300147371868348e-06,
"loss": 2.4641,
"step": 114200
},
{
"epoch": 9.36,
"learning_rate": 1.2837727198297036e-06,
"loss": 2.4972,
"step": 114300
},
{
"epoch": 9.37,
"learning_rate": 1.2673980677910597e-06,
"loss": 2.4601,
"step": 114400
},
{
"epoch": 9.37,
"learning_rate": 1.2510234157524154e-06,
"loss": 2.4744,
"step": 114500
},
{
"epoch": 9.38,
"learning_rate": 1.2346487637137711e-06,
"loss": 2.4776,
"step": 114600
},
{
"epoch": 9.39,
"learning_rate": 1.218274111675127e-06,
"loss": 2.4909,
"step": 114700
},
{
"epoch": 9.4,
"learning_rate": 1.201899459636483e-06,
"loss": 2.446,
"step": 114800
},
{
"epoch": 9.41,
"learning_rate": 1.1855248075978386e-06,
"loss": 2.4938,
"step": 114900
},
{
"epoch": 9.42,
"learning_rate": 1.1691501555591945e-06,
"loss": 2.4416,
"step": 115000
},
{
"epoch": 9.42,
"learning_rate": 1.1527755035205502e-06,
"loss": 2.484,
"step": 115100
},
{
"epoch": 9.43,
"learning_rate": 1.1364008514819061e-06,
"loss": 2.4538,
"step": 115200
},
{
"epoch": 9.44,
"learning_rate": 1.120026199443262e-06,
"loss": 2.465,
"step": 115300
},
{
"epoch": 9.45,
"learning_rate": 1.1036515474046177e-06,
"loss": 2.4822,
"step": 115400
},
{
"epoch": 9.46,
"learning_rate": 1.0872768953659736e-06,
"loss": 2.4673,
"step": 115500
},
{
"epoch": 9.46,
"learning_rate": 1.0709022433273295e-06,
"loss": 2.4847,
"step": 115600
},
{
"epoch": 9.47,
"learning_rate": 1.0545275912886852e-06,
"loss": 2.4347,
"step": 115700
},
{
"epoch": 9.48,
"learning_rate": 1.038152939250041e-06,
"loss": 2.4893,
"step": 115800
},
{
"epoch": 9.49,
"learning_rate": 1.0217782872113968e-06,
"loss": 2.4832,
"step": 115900
},
{
"epoch": 9.5,
"learning_rate": 1.0054036351727527e-06,
"loss": 2.4778,
"step": 116000
},
{
"epoch": 9.51,
"learning_rate": 9.890289831341086e-07,
"loss": 2.4509,
"step": 116100
},
{
"epoch": 9.51,
"learning_rate": 9.726543310954643e-07,
"loss": 2.4635,
"step": 116200
},
{
"epoch": 9.52,
"learning_rate": 9.562796790568202e-07,
"loss": 2.4781,
"step": 116300
},
{
"epoch": 9.53,
"learning_rate": 9.399050270181759e-07,
"loss": 2.451,
"step": 116400
},
{
"epoch": 9.54,
"learning_rate": 9.235303749795318e-07,
"loss": 2.4803,
"step": 116500
},
{
"epoch": 9.55,
"learning_rate": 9.071557229408876e-07,
"loss": 2.462,
"step": 116600
},
{
"epoch": 9.55,
"learning_rate": 8.907810709022434e-07,
"loss": 2.502,
"step": 116700
},
{
"epoch": 9.56,
"learning_rate": 8.744064188635993e-07,
"loss": 2.4988,
"step": 116800
},
{
"epoch": 9.57,
"learning_rate": 8.58031766824955e-07,
"loss": 2.4858,
"step": 116900
},
{
"epoch": 9.58,
"learning_rate": 8.41657114786311e-07,
"loss": 2.4803,
"step": 117000
},
{
"epoch": 9.59,
"learning_rate": 8.252824627476667e-07,
"loss": 2.4874,
"step": 117100
},
{
"epoch": 9.6,
"learning_rate": 8.089078107090224e-07,
"loss": 2.4499,
"step": 117200
},
{
"epoch": 9.6,
"learning_rate": 7.925331586703782e-07,
"loss": 2.4769,
"step": 117300
},
{
"epoch": 9.61,
"learning_rate": 7.761585066317341e-07,
"loss": 2.465,
"step": 117400
},
{
"epoch": 9.62,
"learning_rate": 7.597838545930899e-07,
"loss": 2.4572,
"step": 117500
},
{
"epoch": 9.63,
"learning_rate": 7.434092025544458e-07,
"loss": 2.4699,
"step": 117600
},
{
"epoch": 9.64,
"learning_rate": 7.270345505158016e-07,
"loss": 2.4953,
"step": 117700
},
{
"epoch": 9.64,
"learning_rate": 7.106598984771574e-07,
"loss": 2.4646,
"step": 117800
},
{
"epoch": 9.65,
"learning_rate": 6.942852464385133e-07,
"loss": 2.4412,
"step": 117900
},
{
"epoch": 9.66,
"learning_rate": 6.77910594399869e-07,
"loss": 2.4873,
"step": 118000
},
{
"epoch": 9.67,
"learning_rate": 6.615359423612248e-07,
"loss": 2.5147,
"step": 118100
},
{
"epoch": 9.68,
"learning_rate": 6.451612903225807e-07,
"loss": 2.4683,
"step": 118200
},
{
"epoch": 9.69,
"learning_rate": 6.287866382839365e-07,
"loss": 2.46,
"step": 118300
},
{
"epoch": 9.69,
"learning_rate": 6.124119862452924e-07,
"loss": 2.4775,
"step": 118400
},
{
"epoch": 9.7,
"learning_rate": 5.960373342066481e-07,
"loss": 2.5124,
"step": 118500
},
{
"epoch": 9.71,
"learning_rate": 5.79662682168004e-07,
"loss": 2.4728,
"step": 118600
},
{
"epoch": 9.72,
"learning_rate": 5.632880301293598e-07,
"loss": 2.4743,
"step": 118700
},
{
"epoch": 9.73,
"learning_rate": 5.469133780907157e-07,
"loss": 2.5134,
"step": 118800
},
{
"epoch": 9.73,
"learning_rate": 5.305387260520714e-07,
"loss": 2.4655,
"step": 118900
},
{
"epoch": 9.74,
"learning_rate": 5.141640740134273e-07,
"loss": 2.4559,
"step": 119000
},
{
"epoch": 9.75,
"learning_rate": 4.977894219747831e-07,
"loss": 2.4724,
"step": 119100
},
{
"epoch": 9.76,
"learning_rate": 4.81414769936139e-07,
"loss": 2.506,
"step": 119200
},
{
"epoch": 9.77,
"learning_rate": 4.650401178974947e-07,
"loss": 2.4379,
"step": 119300
},
{
"epoch": 9.78,
"learning_rate": 4.486654658588505e-07,
"loss": 2.4732,
"step": 119400
},
{
"epoch": 9.78,
"learning_rate": 4.3229081382020635e-07,
"loss": 2.4776,
"step": 119500
},
{
"epoch": 9.79,
"learning_rate": 4.159161617815622e-07,
"loss": 2.483,
"step": 119600
},
{
"epoch": 9.8,
"learning_rate": 3.99541509742918e-07,
"loss": 2.4569,
"step": 119700
},
{
"epoch": 9.81,
"learning_rate": 3.831668577042738e-07,
"loss": 2.4781,
"step": 119800
},
{
"epoch": 9.82,
"learning_rate": 3.6679220566562964e-07,
"loss": 2.4737,
"step": 119900
},
{
"epoch": 9.82,
"learning_rate": 3.504175536269855e-07,
"loss": 2.4749,
"step": 120000
},
{
"epoch": 9.83,
"learning_rate": 3.3404290158834123e-07,
"loss": 2.4582,
"step": 120100
},
{
"epoch": 9.84,
"learning_rate": 3.176682495496971e-07,
"loss": 2.4546,
"step": 120200
},
{
"epoch": 9.85,
"learning_rate": 3.0129359751105293e-07,
"loss": 2.4568,
"step": 120300
},
{
"epoch": 9.86,
"learning_rate": 2.849189454724087e-07,
"loss": 2.4637,
"step": 120400
},
{
"epoch": 9.87,
"learning_rate": 2.6854429343376457e-07,
"loss": 2.479,
"step": 120500
},
{
"epoch": 9.87,
"learning_rate": 2.5216964139512036e-07,
"loss": 2.4644,
"step": 120600
},
{
"epoch": 9.88,
"learning_rate": 2.3579498935647619e-07,
"loss": 2.4845,
"step": 120700
},
{
"epoch": 9.89,
"learning_rate": 2.19420337317832e-07,
"loss": 2.4691,
"step": 120800
},
{
"epoch": 9.9,
"learning_rate": 2.0304568527918783e-07,
"loss": 2.4972,
"step": 120900
},
{
"epoch": 9.91,
"learning_rate": 1.8667103324054365e-07,
"loss": 2.4836,
"step": 121000
},
{
"epoch": 9.91,
"learning_rate": 1.7029638120189947e-07,
"loss": 2.4774,
"step": 121100
},
{
"epoch": 9.92,
"learning_rate": 1.539217291632553e-07,
"loss": 2.4536,
"step": 121200
},
{
"epoch": 9.93,
"learning_rate": 1.3754707712461112e-07,
"loss": 2.4536,
"step": 121300
},
{
"epoch": 9.94,
"learning_rate": 1.2117242508596694e-07,
"loss": 2.4993,
"step": 121400
},
{
"epoch": 9.95,
"learning_rate": 1.0479777304732276e-07,
"loss": 2.5068,
"step": 121500
},
{
"epoch": 9.96,
"learning_rate": 8.842312100867858e-08,
"loss": 2.4678,
"step": 121600
},
{
"epoch": 9.96,
"learning_rate": 7.204846897003439e-08,
"loss": 2.4577,
"step": 121700
},
{
"epoch": 9.97,
"learning_rate": 5.567381693139021e-08,
"loss": 2.4623,
"step": 121800
},
{
"epoch": 9.98,
"learning_rate": 3.929916489274603e-08,
"loss": 2.4673,
"step": 121900
},
{
"epoch": 9.99,
"learning_rate": 2.2924512854101853e-08,
"loss": 2.5057,
"step": 122000
},
{
"epoch": 10.0,
"learning_rate": 6.549860815457672e-09,
"loss": 2.4638,
"step": 122100
},
{
"epoch": 10.0,
"eval_bleu": 995.492,
"eval_chrf": 2879.8334,
"eval_chrf_plus": 2710.1728,
"eval_gen_len": 14.645,
"eval_loss": 2.3738529682159424,
"eval_runtime": 38.7293,
"eval_samples_per_second": 25.82,
"eval_steps_per_second": 1.084,
"step": 122140
}
],
"logging_steps": 100,
"max_steps": 122140,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.412332343172915e+18,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}