diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,25939 @@ +{ + "best_metric": 1.145164132118225, + "best_model_checkpoint": "./checkpoints/bart-JES-cnn_dailymail/checkpoint-215337", + "epoch": 6.0, + "global_step": 430674, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.999338246562365e-05, + "loss": 2.5262, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.9986416639964334e-05, + "loss": 2.1005, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 2.997945081430502e-05, + "loss": 1.9954, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 2.9972484988645703e-05, + "loss": 1.9422, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 2.996551916298639e-05, + "loss": 1.8915, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 2.9958553337327073e-05, + "loss": 1.8859, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 2.995158751166776e-05, + "loss": 1.8133, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 2.9944621686008442e-05, + "loss": 1.8649, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 2.9937725518605722e-05, + "loss": 1.8476, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 2.9930759692946405e-05, + "loss": 1.8485, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 2.992379386728709e-05, + "loss": 1.7922, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 2.9916828041627774e-05, + "loss": 1.8166, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 2.990986221596846e-05, + "loss": 1.7964, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 2.9902896390309144e-05, + "loss": 1.7728, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 2.989593056464983e-05, + "loss": 1.7414, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 2.9888964738990513e-05, + "loss": 1.7531, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 2.98819989133312e-05, + "loss": 1.7304, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 2.9875033087671882e-05, + "loss": 1.7315, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 2.986806726201257e-05, + "loss": 1.7073, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 2.986110143635325e-05, + "loss": 1.7254, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 2.9854135610693938e-05, + "loss": 1.7187, + "step": 2100 + }, + { + "epoch": 0.03, + "learning_rate": 2.984716978503462e-05, + "loss": 1.7382, + "step": 2200 + }, + { + "epoch": 0.03, + "learning_rate": 2.9840203959375304e-05, + "loss": 1.6937, + "step": 2300 + }, + { + "epoch": 0.03, + "learning_rate": 2.983323813371599e-05, + "loss": 1.6649, + "step": 2400 + }, + { + "epoch": 0.03, + "learning_rate": 2.9826272308056673e-05, + "loss": 1.7018, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 2.981930648239736e-05, + "loss": 1.6868, + "step": 2600 + }, + { + "epoch": 0.04, + "learning_rate": 2.9812340656738043e-05, + "loss": 1.6758, + "step": 2700 + }, + { + "epoch": 0.04, + "learning_rate": 2.980537483107873e-05, + "loss": 1.7183, + "step": 2800 + }, + { + "epoch": 0.04, + "learning_rate": 2.9798409005419412e-05, + "loss": 1.659, + "step": 2900 + }, + { + "epoch": 0.04, + "learning_rate": 2.97914431797601e-05, + "loss": 1.6675, + "step": 3000 + }, + { + "epoch": 0.04, + "learning_rate": 2.978447735410078e-05, + "loss": 1.6956, + "step": 3100 + }, + { + "epoch": 0.04, + "learning_rate": 2.9777511528441468e-05, + "loss": 1.6716, + "step": 3200 + }, + { + "epoch": 0.05, + "learning_rate": 2.977054570278215e-05, + "loss": 1.6501, + "step": 3300 + }, + { + "epoch": 0.05, + "learning_rate": 2.9763579877122837e-05, + "loss": 1.669, + "step": 3400 + }, + { + "epoch": 0.05, + "learning_rate": 2.975661405146352e-05, + "loss": 1.6555, + "step": 3500 + }, + { + "epoch": 0.05, + "learning_rate": 2.9749648225804207e-05, + "loss": 1.6304, + "step": 3600 + }, + { + "epoch": 0.05, + "learning_rate": 2.974268240014489e-05, + "loss": 1.7103, + "step": 3700 + }, + { + "epoch": 0.05, + "learning_rate": 2.9735716574485576e-05, + "loss": 1.6437, + "step": 3800 + }, + { + "epoch": 0.05, + "learning_rate": 2.972875074882626e-05, + "loss": 1.6302, + "step": 3900 + }, + { + "epoch": 0.06, + "learning_rate": 2.9721784923166945e-05, + "loss": 1.6387, + "step": 4000 + }, + { + "epoch": 0.06, + "learning_rate": 2.971481909750763e-05, + "loss": 1.6215, + "step": 4100 + }, + { + "epoch": 0.06, + "learning_rate": 2.9707853271848315e-05, + "loss": 1.6473, + "step": 4200 + }, + { + "epoch": 0.06, + "learning_rate": 2.9700887446188998e-05, + "loss": 1.6353, + "step": 4300 + }, + { + "epoch": 0.06, + "learning_rate": 2.9693921620529684e-05, + "loss": 1.6203, + "step": 4400 + }, + { + "epoch": 0.06, + "learning_rate": 2.9686955794870364e-05, + "loss": 1.6379, + "step": 4500 + }, + { + "epoch": 0.06, + "learning_rate": 2.967998996921105e-05, + "loss": 1.6431, + "step": 4600 + }, + { + "epoch": 0.07, + "learning_rate": 2.9673024143551733e-05, + "loss": 1.6002, + "step": 4700 + }, + { + "epoch": 0.07, + "learning_rate": 2.966605831789242e-05, + "loss": 1.6099, + "step": 4800 + }, + { + "epoch": 0.07, + "learning_rate": 2.9659092492233106e-05, + "loss": 1.6515, + "step": 4900 + }, + { + "epoch": 0.07, + "learning_rate": 2.965212666657379e-05, + "loss": 1.6138, + "step": 5000 + }, + { + "epoch": 0.07, + "learning_rate": 2.9645160840914475e-05, + "loss": 1.6059, + "step": 5100 + }, + { + "epoch": 0.07, + "learning_rate": 2.9638195015255158e-05, + "loss": 1.6292, + "step": 5200 + }, + { + "epoch": 0.07, + "learning_rate": 2.9631229189595845e-05, + "loss": 1.6093, + "step": 5300 + }, + { + "epoch": 0.08, + "learning_rate": 2.9624263363936528e-05, + "loss": 1.5921, + "step": 5400 + }, + { + "epoch": 0.08, + "learning_rate": 2.9617297538277214e-05, + "loss": 1.6198, + "step": 5500 + }, + { + "epoch": 0.08, + "learning_rate": 2.9610331712617897e-05, + "loss": 1.5706, + "step": 5600 + }, + { + "epoch": 0.08, + "learning_rate": 2.9603365886958583e-05, + "loss": 1.58, + "step": 5700 + }, + { + "epoch": 0.08, + "learning_rate": 2.9596400061299266e-05, + "loss": 1.5918, + "step": 5800 + }, + { + "epoch": 0.08, + "learning_rate": 2.9589434235639953e-05, + "loss": 1.6068, + "step": 5900 + }, + { + "epoch": 0.08, + "learning_rate": 2.9582468409980636e-05, + "loss": 1.6168, + "step": 6000 + }, + { + "epoch": 0.08, + "learning_rate": 2.9575502584321322e-05, + "loss": 1.6105, + "step": 6100 + }, + { + "epoch": 0.09, + "learning_rate": 2.9568536758662005e-05, + "loss": 1.6061, + "step": 6200 + }, + { + "epoch": 0.09, + "learning_rate": 2.956157093300269e-05, + "loss": 1.5705, + "step": 6300 + }, + { + "epoch": 0.09, + "learning_rate": 2.9554605107343374e-05, + "loss": 1.6033, + "step": 6400 + }, + { + "epoch": 0.09, + "learning_rate": 2.954763928168406e-05, + "loss": 1.5526, + "step": 6500 + }, + { + "epoch": 0.09, + "learning_rate": 2.9540673456024744e-05, + "loss": 1.5967, + "step": 6600 + }, + { + "epoch": 0.09, + "learning_rate": 2.9533707630365427e-05, + "loss": 1.6052, + "step": 6700 + }, + { + "epoch": 0.09, + "learning_rate": 2.952674180470611e-05, + "loss": 1.5469, + "step": 6800 + }, + { + "epoch": 0.1, + "learning_rate": 2.9519775979046796e-05, + "loss": 1.5831, + "step": 6900 + }, + { + "epoch": 0.1, + "learning_rate": 2.9512879811644073e-05, + "loss": 1.5752, + "step": 7000 + }, + { + "epoch": 0.1, + "learning_rate": 2.950591398598476e-05, + "loss": 1.5882, + "step": 7100 + }, + { + "epoch": 0.1, + "learning_rate": 2.9498948160325442e-05, + "loss": 1.6126, + "step": 7200 + }, + { + "epoch": 0.1, + "learning_rate": 2.9491982334666128e-05, + "loss": 1.5726, + "step": 7300 + }, + { + "epoch": 0.1, + "learning_rate": 2.948501650900681e-05, + "loss": 1.6108, + "step": 7400 + }, + { + "epoch": 0.1, + "learning_rate": 2.9478050683347498e-05, + "loss": 1.5633, + "step": 7500 + }, + { + "epoch": 0.11, + "learning_rate": 2.947108485768818e-05, + "loss": 1.5745, + "step": 7600 + }, + { + "epoch": 0.11, + "learning_rate": 2.9464119032028867e-05, + "loss": 1.5643, + "step": 7700 + }, + { + "epoch": 0.11, + "learning_rate": 2.945715320636955e-05, + "loss": 1.5851, + "step": 7800 + }, + { + "epoch": 0.11, + "learning_rate": 2.9450187380710236e-05, + "loss": 1.5255, + "step": 7900 + }, + { + "epoch": 0.11, + "learning_rate": 2.944322155505092e-05, + "loss": 1.5414, + "step": 8000 + }, + { + "epoch": 0.11, + "learning_rate": 2.9436255729391606e-05, + "loss": 1.5642, + "step": 8100 + }, + { + "epoch": 0.11, + "learning_rate": 2.9429289903732292e-05, + "loss": 1.5488, + "step": 8200 + }, + { + "epoch": 0.12, + "learning_rate": 2.9422324078072975e-05, + "loss": 1.5419, + "step": 8300 + }, + { + "epoch": 0.12, + "learning_rate": 2.941535825241366e-05, + "loss": 1.5602, + "step": 8400 + }, + { + "epoch": 0.12, + "learning_rate": 2.9408392426754344e-05, + "loss": 1.5651, + "step": 8500 + }, + { + "epoch": 0.12, + "learning_rate": 2.940142660109503e-05, + "loss": 1.56, + "step": 8600 + }, + { + "epoch": 0.12, + "learning_rate": 2.9394460775435714e-05, + "loss": 1.5774, + "step": 8700 + }, + { + "epoch": 0.12, + "learning_rate": 2.93874949497764e-05, + "loss": 1.5753, + "step": 8800 + }, + { + "epoch": 0.12, + "learning_rate": 2.9380529124117083e-05, + "loss": 1.5387, + "step": 8900 + }, + { + "epoch": 0.13, + "learning_rate": 2.937356329845777e-05, + "loss": 1.538, + "step": 9000 + }, + { + "epoch": 0.13, + "learning_rate": 2.9366667131055046e-05, + "loss": 1.5481, + "step": 9100 + }, + { + "epoch": 0.13, + "learning_rate": 2.9359701305395732e-05, + "loss": 1.5839, + "step": 9200 + }, + { + "epoch": 0.13, + "learning_rate": 2.9352735479736412e-05, + "loss": 1.5186, + "step": 9300 + }, + { + "epoch": 0.13, + "learning_rate": 2.93457696540771e-05, + "loss": 1.5407, + "step": 9400 + }, + { + "epoch": 0.13, + "learning_rate": 2.933880382841778e-05, + "loss": 1.5759, + "step": 9500 + }, + { + "epoch": 0.13, + "learning_rate": 2.9331838002758468e-05, + "loss": 1.5414, + "step": 9600 + }, + { + "epoch": 0.14, + "learning_rate": 2.932487217709915e-05, + "loss": 1.518, + "step": 9700 + }, + { + "epoch": 0.14, + "learning_rate": 2.9317906351439837e-05, + "loss": 1.5332, + "step": 9800 + }, + { + "epoch": 0.14, + "learning_rate": 2.931094052578052e-05, + "loss": 1.5843, + "step": 9900 + }, + { + "epoch": 0.14, + "learning_rate": 2.9303974700121206e-05, + "loss": 1.5369, + "step": 10000 + }, + { + "epoch": 0.14, + "learning_rate": 2.929700887446189e-05, + "loss": 1.5456, + "step": 10100 + }, + { + "epoch": 0.14, + "learning_rate": 2.9290043048802576e-05, + "loss": 1.5237, + "step": 10200 + }, + { + "epoch": 0.14, + "learning_rate": 2.928307722314326e-05, + "loss": 1.5473, + "step": 10300 + }, + { + "epoch": 0.14, + "learning_rate": 2.9276111397483945e-05, + "loss": 1.5331, + "step": 10400 + }, + { + "epoch": 0.15, + "learning_rate": 2.9269145571824628e-05, + "loss": 1.5466, + "step": 10500 + }, + { + "epoch": 0.15, + "learning_rate": 2.9262179746165315e-05, + "loss": 1.5567, + "step": 10600 + }, + { + "epoch": 0.15, + "learning_rate": 2.9255213920505998e-05, + "loss": 1.5725, + "step": 10700 + }, + { + "epoch": 0.15, + "learning_rate": 2.9248248094846684e-05, + "loss": 1.5546, + "step": 10800 + }, + { + "epoch": 0.15, + "learning_rate": 2.9241282269187367e-05, + "loss": 1.5172, + "step": 10900 + }, + { + "epoch": 0.15, + "learning_rate": 2.9234316443528053e-05, + "loss": 1.511, + "step": 11000 + }, + { + "epoch": 0.15, + "learning_rate": 2.9227350617868736e-05, + "loss": 1.4924, + "step": 11100 + }, + { + "epoch": 0.16, + "learning_rate": 2.9220384792209423e-05, + "loss": 1.5773, + "step": 11200 + }, + { + "epoch": 0.16, + "learning_rate": 2.92134886248067e-05, + "loss": 1.568, + "step": 11300 + }, + { + "epoch": 0.16, + "learning_rate": 2.9206522799147385e-05, + "loss": 1.5465, + "step": 11400 + }, + { + "epoch": 0.16, + "learning_rate": 2.919955697348807e-05, + "loss": 1.5592, + "step": 11500 + }, + { + "epoch": 0.16, + "learning_rate": 2.9192591147828755e-05, + "loss": 1.5336, + "step": 11600 + }, + { + "epoch": 0.16, + "learning_rate": 2.9185625322169438e-05, + "loss": 1.4984, + "step": 11700 + }, + { + "epoch": 0.16, + "learning_rate": 2.917865949651012e-05, + "loss": 1.5131, + "step": 11800 + }, + { + "epoch": 0.17, + "learning_rate": 2.9171693670850804e-05, + "loss": 1.5503, + "step": 11900 + }, + { + "epoch": 0.17, + "learning_rate": 2.916472784519149e-05, + "loss": 1.5037, + "step": 12000 + }, + { + "epoch": 0.17, + "learning_rate": 2.9157762019532173e-05, + "loss": 1.5283, + "step": 12100 + }, + { + "epoch": 0.17, + "learning_rate": 2.915079619387286e-05, + "loss": 1.5331, + "step": 12200 + }, + { + "epoch": 0.17, + "learning_rate": 2.9143830368213546e-05, + "loss": 1.509, + "step": 12300 + }, + { + "epoch": 0.17, + "learning_rate": 2.913686454255423e-05, + "loss": 1.5433, + "step": 12400 + }, + { + "epoch": 0.17, + "learning_rate": 2.9129968375151505e-05, + "loss": 1.513, + "step": 12500 + }, + { + "epoch": 0.18, + "learning_rate": 2.9123002549492192e-05, + "loss": 1.553, + "step": 12600 + }, + { + "epoch": 0.18, + "learning_rate": 2.9116036723832875e-05, + "loss": 1.5211, + "step": 12700 + }, + { + "epoch": 0.18, + "learning_rate": 2.910907089817356e-05, + "loss": 1.4837, + "step": 12800 + }, + { + "epoch": 0.18, + "learning_rate": 2.9102105072514244e-05, + "loss": 1.5275, + "step": 12900 + }, + { + "epoch": 0.18, + "learning_rate": 2.909513924685493e-05, + "loss": 1.4938, + "step": 13000 + }, + { + "epoch": 0.18, + "learning_rate": 2.9088173421195617e-05, + "loss": 1.5339, + "step": 13100 + }, + { + "epoch": 0.18, + "learning_rate": 2.90812075955363e-05, + "loss": 1.4991, + "step": 13200 + }, + { + "epoch": 0.19, + "learning_rate": 2.9074241769876986e-05, + "loss": 1.4775, + "step": 13300 + }, + { + "epoch": 0.19, + "learning_rate": 2.906727594421767e-05, + "loss": 1.4896, + "step": 13400 + }, + { + "epoch": 0.19, + "learning_rate": 2.9060310118558356e-05, + "loss": 1.5014, + "step": 13500 + }, + { + "epoch": 0.19, + "learning_rate": 2.905334429289904e-05, + "loss": 1.5069, + "step": 13600 + }, + { + "epoch": 0.19, + "learning_rate": 2.9046378467239725e-05, + "loss": 1.4634, + "step": 13700 + }, + { + "epoch": 0.19, + "learning_rate": 2.9039412641580408e-05, + "loss": 1.4807, + "step": 13800 + }, + { + "epoch": 0.19, + "learning_rate": 2.9032446815921094e-05, + "loss": 1.5231, + "step": 13900 + }, + { + "epoch": 0.2, + "learning_rate": 2.9025480990261777e-05, + "loss": 1.482, + "step": 14000 + }, + { + "epoch": 0.2, + "learning_rate": 2.9018515164602464e-05, + "loss": 1.5036, + "step": 14100 + }, + { + "epoch": 0.2, + "learning_rate": 2.9011549338943143e-05, + "loss": 1.5032, + "step": 14200 + }, + { + "epoch": 0.2, + "learning_rate": 2.900458351328383e-05, + "loss": 1.4612, + "step": 14300 + }, + { + "epoch": 0.2, + "learning_rate": 2.8997617687624513e-05, + "loss": 1.4982, + "step": 14400 + }, + { + "epoch": 0.2, + "learning_rate": 2.89906518619652e-05, + "loss": 1.4899, + "step": 14500 + }, + { + "epoch": 0.2, + "learning_rate": 2.8983686036305882e-05, + "loss": 1.5048, + "step": 14600 + }, + { + "epoch": 0.2, + "learning_rate": 2.897672021064657e-05, + "loss": 1.4901, + "step": 14700 + }, + { + "epoch": 0.21, + "learning_rate": 2.896975438498725e-05, + "loss": 1.4956, + "step": 14800 + }, + { + "epoch": 0.21, + "learning_rate": 2.8962788559327938e-05, + "loss": 1.505, + "step": 14900 + }, + { + "epoch": 0.21, + "learning_rate": 2.895582273366862e-05, + "loss": 1.4982, + "step": 15000 + }, + { + "epoch": 0.21, + "learning_rate": 2.8948856908009307e-05, + "loss": 1.4777, + "step": 15100 + }, + { + "epoch": 0.21, + "learning_rate": 2.894189108234999e-05, + "loss": 1.4768, + "step": 15200 + }, + { + "epoch": 0.21, + "learning_rate": 2.8934925256690676e-05, + "loss": 1.485, + "step": 15300 + }, + { + "epoch": 0.21, + "learning_rate": 2.892795943103136e-05, + "loss": 1.4376, + "step": 15400 + }, + { + "epoch": 0.22, + "learning_rate": 2.8920993605372046e-05, + "loss": 1.4533, + "step": 15500 + }, + { + "epoch": 0.22, + "learning_rate": 2.8914027779712732e-05, + "loss": 1.4851, + "step": 15600 + }, + { + "epoch": 0.22, + "learning_rate": 2.8907061954053415e-05, + "loss": 1.4593, + "step": 15700 + }, + { + "epoch": 0.22, + "learning_rate": 2.89000961283941e-05, + "loss": 1.4993, + "step": 15800 + }, + { + "epoch": 0.22, + "learning_rate": 2.8893130302734785e-05, + "loss": 1.5023, + "step": 15900 + }, + { + "epoch": 0.22, + "learning_rate": 2.888616447707547e-05, + "loss": 1.4778, + "step": 16000 + }, + { + "epoch": 0.22, + "learning_rate": 2.8879198651416154e-05, + "loss": 1.4609, + "step": 16100 + }, + { + "epoch": 0.23, + "learning_rate": 2.887223282575684e-05, + "loss": 1.494, + "step": 16200 + }, + { + "epoch": 0.23, + "learning_rate": 2.8865267000097523e-05, + "loss": 1.4559, + "step": 16300 + }, + { + "epoch": 0.23, + "learning_rate": 2.8858301174438206e-05, + "loss": 1.503, + "step": 16400 + }, + { + "epoch": 0.23, + "learning_rate": 2.885133534877889e-05, + "loss": 1.5209, + "step": 16500 + }, + { + "epoch": 0.23, + "learning_rate": 2.8844369523119576e-05, + "loss": 1.4833, + "step": 16600 + }, + { + "epoch": 0.23, + "learning_rate": 2.883740369746026e-05, + "loss": 1.4805, + "step": 16700 + }, + { + "epoch": 0.23, + "learning_rate": 2.8830437871800945e-05, + "loss": 1.4773, + "step": 16800 + }, + { + "epoch": 0.24, + "learning_rate": 2.8823472046141628e-05, + "loss": 1.4863, + "step": 16900 + }, + { + "epoch": 0.24, + "learning_rate": 2.8816575878738908e-05, + "loss": 1.5073, + "step": 17000 + }, + { + "epoch": 0.24, + "learning_rate": 2.880961005307959e-05, + "loss": 1.4309, + "step": 17100 + }, + { + "epoch": 0.24, + "learning_rate": 2.8802644227420277e-05, + "loss": 1.486, + "step": 17200 + }, + { + "epoch": 0.24, + "learning_rate": 2.8795748060017554e-05, + "loss": 1.4545, + "step": 17300 + }, + { + "epoch": 0.24, + "learning_rate": 2.878878223435824e-05, + "loss": 1.4366, + "step": 17400 + }, + { + "epoch": 0.24, + "learning_rate": 2.8781816408698923e-05, + "loss": 1.4476, + "step": 17500 + }, + { + "epoch": 0.25, + "learning_rate": 2.877485058303961e-05, + "loss": 1.5035, + "step": 17600 + }, + { + "epoch": 0.25, + "learning_rate": 2.8767884757380292e-05, + "loss": 1.4817, + "step": 17700 + }, + { + "epoch": 0.25, + "learning_rate": 2.876091893172098e-05, + "loss": 1.4922, + "step": 17800 + }, + { + "epoch": 0.25, + "learning_rate": 2.8753953106061662e-05, + "loss": 1.4724, + "step": 17900 + }, + { + "epoch": 0.25, + "learning_rate": 2.8746987280402348e-05, + "loss": 1.467, + "step": 18000 + }, + { + "epoch": 0.25, + "learning_rate": 2.874002145474303e-05, + "loss": 1.4925, + "step": 18100 + }, + { + "epoch": 0.25, + "learning_rate": 2.8733055629083717e-05, + "loss": 1.4711, + "step": 18200 + }, + { + "epoch": 0.25, + "learning_rate": 2.87260898034244e-05, + "loss": 1.467, + "step": 18300 + }, + { + "epoch": 0.26, + "learning_rate": 2.8719123977765087e-05, + "loss": 1.4444, + "step": 18400 + }, + { + "epoch": 0.26, + "learning_rate": 2.871215815210577e-05, + "loss": 1.462, + "step": 18500 + }, + { + "epoch": 0.26, + "learning_rate": 2.8705192326446456e-05, + "loss": 1.4642, + "step": 18600 + }, + { + "epoch": 0.26, + "learning_rate": 2.869822650078714e-05, + "loss": 1.4821, + "step": 18700 + }, + { + "epoch": 0.26, + "learning_rate": 2.8691260675127826e-05, + "loss": 1.455, + "step": 18800 + }, + { + "epoch": 0.26, + "learning_rate": 2.868429484946851e-05, + "loss": 1.5036, + "step": 18900 + }, + { + "epoch": 0.26, + "learning_rate": 2.867732902380919e-05, + "loss": 1.4585, + "step": 19000 + }, + { + "epoch": 0.27, + "learning_rate": 2.8670363198149875e-05, + "loss": 1.4333, + "step": 19100 + }, + { + "epoch": 0.27, + "learning_rate": 2.866339737249056e-05, + "loss": 1.4954, + "step": 19200 + }, + { + "epoch": 0.27, + "learning_rate": 2.8656431546831244e-05, + "loss": 1.5066, + "step": 19300 + }, + { + "epoch": 0.27, + "learning_rate": 2.864946572117193e-05, + "loss": 1.4496, + "step": 19400 + }, + { + "epoch": 0.27, + "learning_rate": 2.8642499895512613e-05, + "loss": 1.4684, + "step": 19500 + }, + { + "epoch": 0.27, + "learning_rate": 2.86355340698533e-05, + "loss": 1.4466, + "step": 19600 + }, + { + "epoch": 0.27, + "learning_rate": 2.8628637902450576e-05, + "loss": 1.4826, + "step": 19700 + }, + { + "epoch": 0.28, + "learning_rate": 2.8621672076791262e-05, + "loss": 1.4521, + "step": 19800 + }, + { + "epoch": 0.28, + "learning_rate": 2.8614706251131945e-05, + "loss": 1.4588, + "step": 19900 + }, + { + "epoch": 0.28, + "learning_rate": 2.8607740425472632e-05, + "loss": 1.4844, + "step": 20000 + }, + { + "epoch": 0.28, + "learning_rate": 2.8600774599813315e-05, + "loss": 1.4472, + "step": 20100 + }, + { + "epoch": 0.28, + "learning_rate": 2.8593808774154e-05, + "loss": 1.4347, + "step": 20200 + }, + { + "epoch": 0.28, + "learning_rate": 2.8586842948494684e-05, + "loss": 1.4641, + "step": 20300 + }, + { + "epoch": 0.28, + "learning_rate": 2.857987712283537e-05, + "loss": 1.4338, + "step": 20400 + }, + { + "epoch": 0.29, + "learning_rate": 2.8572911297176057e-05, + "loss": 1.4878, + "step": 20500 + }, + { + "epoch": 0.29, + "learning_rate": 2.856594547151674e-05, + "loss": 1.4638, + "step": 20600 + }, + { + "epoch": 0.29, + "learning_rate": 2.8558979645857426e-05, + "loss": 1.4685, + "step": 20700 + }, + { + "epoch": 0.29, + "learning_rate": 2.855201382019811e-05, + "loss": 1.455, + "step": 20800 + }, + { + "epoch": 0.29, + "learning_rate": 2.8545047994538796e-05, + "loss": 1.4495, + "step": 20900 + }, + { + "epoch": 0.29, + "learning_rate": 2.853808216887948e-05, + "loss": 1.4194, + "step": 21000 + }, + { + "epoch": 0.29, + "learning_rate": 2.8531116343220165e-05, + "loss": 1.4316, + "step": 21100 + }, + { + "epoch": 0.3, + "learning_rate": 2.8524150517560848e-05, + "loss": 1.4557, + "step": 21200 + }, + { + "epoch": 0.3, + "learning_rate": 2.8517184691901534e-05, + "loss": 1.4427, + "step": 21300 + }, + { + "epoch": 0.3, + "learning_rate": 2.8510218866242217e-05, + "loss": 1.4752, + "step": 21400 + }, + { + "epoch": 0.3, + "learning_rate": 2.85032530405829e-05, + "loss": 1.4492, + "step": 21500 + }, + { + "epoch": 0.3, + "learning_rate": 2.8496287214923583e-05, + "loss": 1.4511, + "step": 21600 + }, + { + "epoch": 0.3, + "learning_rate": 2.848932138926427e-05, + "loss": 1.4583, + "step": 21700 + }, + { + "epoch": 0.3, + "learning_rate": 2.8482355563604953e-05, + "loss": 1.4913, + "step": 21800 + }, + { + "epoch": 0.31, + "learning_rate": 2.847538973794564e-05, + "loss": 1.4318, + "step": 21900 + }, + { + "epoch": 0.31, + "learning_rate": 2.8468423912286322e-05, + "loss": 1.4226, + "step": 22000 + }, + { + "epoch": 0.31, + "learning_rate": 2.846145808662701e-05, + "loss": 1.4472, + "step": 22100 + }, + { + "epoch": 0.31, + "learning_rate": 2.845449226096769e-05, + "loss": 1.4495, + "step": 22200 + }, + { + "epoch": 0.31, + "learning_rate": 2.8447526435308378e-05, + "loss": 1.4007, + "step": 22300 + }, + { + "epoch": 0.31, + "learning_rate": 2.844056060964906e-05, + "loss": 1.4201, + "step": 22400 + }, + { + "epoch": 0.31, + "learning_rate": 2.8433594783989747e-05, + "loss": 1.4641, + "step": 22500 + }, + { + "epoch": 0.31, + "learning_rate": 2.842662895833043e-05, + "loss": 1.4675, + "step": 22600 + }, + { + "epoch": 0.32, + "learning_rate": 2.8419663132671117e-05, + "loss": 1.4365, + "step": 22700 + }, + { + "epoch": 0.32, + "learning_rate": 2.84126973070118e-05, + "loss": 1.4509, + "step": 22800 + }, + { + "epoch": 0.32, + "learning_rate": 2.8405731481352486e-05, + "loss": 1.4628, + "step": 22900 + }, + { + "epoch": 0.32, + "learning_rate": 2.8398765655693172e-05, + "loss": 1.4723, + "step": 23000 + }, + { + "epoch": 0.32, + "learning_rate": 2.8391799830033855e-05, + "loss": 1.4234, + "step": 23100 + }, + { + "epoch": 0.32, + "learning_rate": 2.838483400437454e-05, + "loss": 1.4527, + "step": 23200 + }, + { + "epoch": 0.32, + "learning_rate": 2.8377868178715225e-05, + "loss": 1.4473, + "step": 23300 + }, + { + "epoch": 0.33, + "learning_rate": 2.837090235305591e-05, + "loss": 1.4594, + "step": 23400 + }, + { + "epoch": 0.33, + "learning_rate": 2.8363936527396594e-05, + "loss": 1.4888, + "step": 23500 + }, + { + "epoch": 0.33, + "learning_rate": 2.835697070173728e-05, + "loss": 1.4387, + "step": 23600 + }, + { + "epoch": 0.33, + "learning_rate": 2.835000487607796e-05, + "loss": 1.4355, + "step": 23700 + }, + { + "epoch": 0.33, + "learning_rate": 2.8343039050418646e-05, + "loss": 1.4441, + "step": 23800 + }, + { + "epoch": 0.33, + "learning_rate": 2.833607322475933e-05, + "loss": 1.4317, + "step": 23900 + }, + { + "epoch": 0.33, + "learning_rate": 2.8329107399100016e-05, + "loss": 1.4386, + "step": 24000 + }, + { + "epoch": 0.34, + "learning_rate": 2.83221415734407e-05, + "loss": 1.4528, + "step": 24100 + }, + { + "epoch": 0.34, + "learning_rate": 2.8315175747781385e-05, + "loss": 1.4295, + "step": 24200 + }, + { + "epoch": 0.34, + "learning_rate": 2.8308209922122068e-05, + "loss": 1.402, + "step": 24300 + }, + { + "epoch": 0.34, + "learning_rate": 2.8301244096462754e-05, + "loss": 1.4026, + "step": 24400 + }, + { + "epoch": 0.34, + "learning_rate": 2.8294278270803437e-05, + "loss": 1.4503, + "step": 24500 + }, + { + "epoch": 0.34, + "learning_rate": 2.8287312445144124e-05, + "loss": 1.4299, + "step": 24600 + }, + { + "epoch": 0.34, + "learning_rate": 2.8280346619484807e-05, + "loss": 1.4347, + "step": 24700 + }, + { + "epoch": 0.35, + "learning_rate": 2.8273380793825493e-05, + "loss": 1.4493, + "step": 24800 + }, + { + "epoch": 0.35, + "learning_rate": 2.8266414968166176e-05, + "loss": 1.4448, + "step": 24900 + }, + { + "epoch": 0.35, + "learning_rate": 2.8259449142506863e-05, + "loss": 1.4153, + "step": 25000 + }, + { + "epoch": 0.35, + "learning_rate": 2.8252483316847546e-05, + "loss": 1.4252, + "step": 25100 + }, + { + "epoch": 0.35, + "learning_rate": 2.8245517491188232e-05, + "loss": 1.3862, + "step": 25200 + }, + { + "epoch": 0.35, + "learning_rate": 2.823855166552892e-05, + "loss": 1.4359, + "step": 25300 + }, + { + "epoch": 0.35, + "learning_rate": 2.82315858398696e-05, + "loss": 1.4428, + "step": 25400 + }, + { + "epoch": 0.36, + "learning_rate": 2.8224620014210288e-05, + "loss": 1.4598, + "step": 25500 + }, + { + "epoch": 0.36, + "learning_rate": 2.821765418855097e-05, + "loss": 1.4415, + "step": 25600 + }, + { + "epoch": 0.36, + "learning_rate": 2.8210758021148247e-05, + "loss": 1.4143, + "step": 25700 + }, + { + "epoch": 0.36, + "learning_rate": 2.8203792195488933e-05, + "loss": 1.3649, + "step": 25800 + }, + { + "epoch": 0.36, + "learning_rate": 2.819689602808621e-05, + "loss": 1.4065, + "step": 25900 + }, + { + "epoch": 0.36, + "learning_rate": 2.8189930202426896e-05, + "loss": 1.4547, + "step": 26000 + }, + { + "epoch": 0.36, + "learning_rate": 2.818296437676758e-05, + "loss": 1.4199, + "step": 26100 + }, + { + "epoch": 0.37, + "learning_rate": 2.8175998551108266e-05, + "loss": 1.418, + "step": 26200 + }, + { + "epoch": 0.37, + "learning_rate": 2.8169032725448945e-05, + "loss": 1.4205, + "step": 26300 + }, + { + "epoch": 0.37, + "learning_rate": 2.816206689978963e-05, + "loss": 1.433, + "step": 26400 + }, + { + "epoch": 0.37, + "learning_rate": 2.8155101074130315e-05, + "loss": 1.413, + "step": 26500 + }, + { + "epoch": 0.37, + "learning_rate": 2.8148135248471e-05, + "loss": 1.3851, + "step": 26600 + }, + { + "epoch": 0.37, + "learning_rate": 2.8141169422811684e-05, + "loss": 1.4563, + "step": 26700 + }, + { + "epoch": 0.37, + "learning_rate": 2.813420359715237e-05, + "loss": 1.4352, + "step": 26800 + }, + { + "epoch": 0.37, + "learning_rate": 2.8127237771493057e-05, + "loss": 1.4139, + "step": 26900 + }, + { + "epoch": 0.38, + "learning_rate": 2.812027194583374e-05, + "loss": 1.3865, + "step": 27000 + }, + { + "epoch": 0.38, + "learning_rate": 2.8113306120174426e-05, + "loss": 1.4146, + "step": 27100 + }, + { + "epoch": 0.38, + "learning_rate": 2.810634029451511e-05, + "loss": 1.3974, + "step": 27200 + }, + { + "epoch": 0.38, + "learning_rate": 2.8099374468855795e-05, + "loss": 1.3841, + "step": 27300 + }, + { + "epoch": 0.38, + "learning_rate": 2.809240864319648e-05, + "loss": 1.4116, + "step": 27400 + }, + { + "epoch": 0.38, + "learning_rate": 2.8085442817537165e-05, + "loss": 1.3978, + "step": 27500 + }, + { + "epoch": 0.38, + "learning_rate": 2.8078476991877848e-05, + "loss": 1.4165, + "step": 27600 + }, + { + "epoch": 0.39, + "learning_rate": 2.8071511166218534e-05, + "loss": 1.4303, + "step": 27700 + }, + { + "epoch": 0.39, + "learning_rate": 2.8064545340559217e-05, + "loss": 1.4583, + "step": 27800 + }, + { + "epoch": 0.39, + "learning_rate": 2.8057579514899904e-05, + "loss": 1.3974, + "step": 27900 + }, + { + "epoch": 0.39, + "learning_rate": 2.8050613689240587e-05, + "loss": 1.4024, + "step": 28000 + }, + { + "epoch": 0.39, + "learning_rate": 2.8043647863581273e-05, + "loss": 1.4423, + "step": 28100 + }, + { + "epoch": 0.39, + "learning_rate": 2.8036682037921956e-05, + "loss": 1.4083, + "step": 28200 + }, + { + "epoch": 0.39, + "learning_rate": 2.8029716212262642e-05, + "loss": 1.421, + "step": 28300 + }, + { + "epoch": 0.4, + "learning_rate": 2.8022750386603325e-05, + "loss": 1.3915, + "step": 28400 + }, + { + "epoch": 0.4, + "learning_rate": 2.8015784560944008e-05, + "loss": 1.3912, + "step": 28500 + }, + { + "epoch": 0.4, + "learning_rate": 2.8008888393541288e-05, + "loss": 1.3958, + "step": 28600 + }, + { + "epoch": 0.4, + "learning_rate": 2.800192256788197e-05, + "loss": 1.4012, + "step": 28700 + }, + { + "epoch": 0.4, + "learning_rate": 2.7994956742222654e-05, + "loss": 1.4497, + "step": 28800 + }, + { + "epoch": 0.4, + "learning_rate": 2.798799091656334e-05, + "loss": 1.4186, + "step": 28900 + }, + { + "epoch": 0.4, + "learning_rate": 2.7981025090904023e-05, + "loss": 1.4303, + "step": 29000 + }, + { + "epoch": 0.41, + "learning_rate": 2.797405926524471e-05, + "loss": 1.4108, + "step": 29100 + }, + { + "epoch": 0.41, + "learning_rate": 2.7967093439585393e-05, + "loss": 1.3992, + "step": 29200 + }, + { + "epoch": 0.41, + "learning_rate": 2.796012761392608e-05, + "loss": 1.4444, + "step": 29300 + }, + { + "epoch": 0.41, + "learning_rate": 2.7953161788266762e-05, + "loss": 1.4092, + "step": 29400 + }, + { + "epoch": 0.41, + "learning_rate": 2.794619596260745e-05, + "loss": 1.417, + "step": 29500 + }, + { + "epoch": 0.41, + "learning_rate": 2.793923013694813e-05, + "loss": 1.4212, + "step": 29600 + }, + { + "epoch": 0.41, + "learning_rate": 2.7932264311288818e-05, + "loss": 1.4464, + "step": 29700 + }, + { + "epoch": 0.42, + "learning_rate": 2.79252984856295e-05, + "loss": 1.3923, + "step": 29800 + }, + { + "epoch": 0.42, + "learning_rate": 2.7918332659970187e-05, + "loss": 1.4111, + "step": 29900 + }, + { + "epoch": 0.42, + "learning_rate": 2.791136683431087e-05, + "loss": 1.4088, + "step": 30000 + }, + { + "epoch": 0.42, + "learning_rate": 2.7904401008651557e-05, + "loss": 1.4142, + "step": 30100 + }, + { + "epoch": 0.42, + "learning_rate": 2.7897435182992243e-05, + "loss": 1.4369, + "step": 30200 + }, + { + "epoch": 0.42, + "learning_rate": 2.7890469357332926e-05, + "loss": 1.414, + "step": 30300 + }, + { + "epoch": 0.42, + "learning_rate": 2.7883503531673612e-05, + "loss": 1.3888, + "step": 30400 + }, + { + "epoch": 0.42, + "learning_rate": 2.7876537706014295e-05, + "loss": 1.4087, + "step": 30500 + }, + { + "epoch": 0.43, + "learning_rate": 2.7869641538611572e-05, + "loss": 1.4189, + "step": 30600 + }, + { + "epoch": 0.43, + "learning_rate": 2.7862675712952258e-05, + "loss": 1.3948, + "step": 30700 + }, + { + "epoch": 0.43, + "learning_rate": 2.785570988729294e-05, + "loss": 1.4055, + "step": 30800 + }, + { + "epoch": 0.43, + "learning_rate": 2.7848744061633628e-05, + "loss": 1.3809, + "step": 30900 + }, + { + "epoch": 0.43, + "learning_rate": 2.7841778235974314e-05, + "loss": 1.454, + "step": 31000 + }, + { + "epoch": 0.43, + "learning_rate": 2.7834812410314994e-05, + "loss": 1.3974, + "step": 31100 + }, + { + "epoch": 0.43, + "learning_rate": 2.782784658465568e-05, + "loss": 1.4263, + "step": 31200 + }, + { + "epoch": 0.44, + "learning_rate": 2.7820880758996363e-05, + "loss": 1.3835, + "step": 31300 + }, + { + "epoch": 0.44, + "learning_rate": 2.781391493333705e-05, + "loss": 1.4252, + "step": 31400 + }, + { + "epoch": 0.44, + "learning_rate": 2.7806949107677732e-05, + "loss": 1.4255, + "step": 31500 + }, + { + "epoch": 0.44, + "learning_rate": 2.779998328201842e-05, + "loss": 1.3858, + "step": 31600 + }, + { + "epoch": 0.44, + "learning_rate": 2.77930174563591e-05, + "loss": 1.3573, + "step": 31700 + }, + { + "epoch": 0.44, + "learning_rate": 2.7786051630699788e-05, + "loss": 1.4498, + "step": 31800 + }, + { + "epoch": 0.44, + "learning_rate": 2.777908580504047e-05, + "loss": 1.3861, + "step": 31900 + }, + { + "epoch": 0.45, + "learning_rate": 2.7772119979381157e-05, + "loss": 1.459, + "step": 32000 + }, + { + "epoch": 0.45, + "learning_rate": 2.776515415372184e-05, + "loss": 1.4208, + "step": 32100 + }, + { + "epoch": 0.45, + "learning_rate": 2.7758188328062527e-05, + "loss": 1.4005, + "step": 32200 + }, + { + "epoch": 0.45, + "learning_rate": 2.775122250240321e-05, + "loss": 1.4153, + "step": 32300 + }, + { + "epoch": 0.45, + "learning_rate": 2.7744256676743896e-05, + "loss": 1.3866, + "step": 32400 + }, + { + "epoch": 0.45, + "learning_rate": 2.773729085108458e-05, + "loss": 1.3921, + "step": 32500 + }, + { + "epoch": 0.45, + "learning_rate": 2.7730325025425265e-05, + "loss": 1.4176, + "step": 32600 + }, + { + "epoch": 0.46, + "learning_rate": 2.772335919976595e-05, + "loss": 1.438, + "step": 32700 + }, + { + "epoch": 0.46, + "learning_rate": 2.7716393374106635e-05, + "loss": 1.4075, + "step": 32800 + }, + { + "epoch": 0.46, + "learning_rate": 2.7709427548447318e-05, + "loss": 1.4416, + "step": 32900 + }, + { + "epoch": 0.46, + "learning_rate": 2.7702531381044598e-05, + "loss": 1.3933, + "step": 33000 + }, + { + "epoch": 0.46, + "learning_rate": 2.769556555538528e-05, + "loss": 1.3889, + "step": 33100 + }, + { + "epoch": 0.46, + "learning_rate": 2.7688599729725967e-05, + "loss": 1.4002, + "step": 33200 + }, + { + "epoch": 0.46, + "learning_rate": 2.768163390406665e-05, + "loss": 1.4135, + "step": 33300 + }, + { + "epoch": 0.47, + "learning_rate": 2.7674668078407336e-05, + "loss": 1.392, + "step": 33400 + }, + { + "epoch": 0.47, + "learning_rate": 2.766770225274802e-05, + "loss": 1.4133, + "step": 33500 + }, + { + "epoch": 0.47, + "learning_rate": 2.7660736427088702e-05, + "loss": 1.397, + "step": 33600 + }, + { + "epoch": 0.47, + "learning_rate": 2.7653770601429385e-05, + "loss": 1.3767, + "step": 33700 + }, + { + "epoch": 0.47, + "learning_rate": 2.7646804775770072e-05, + "loss": 1.411, + "step": 33800 + }, + { + "epoch": 0.47, + "learning_rate": 2.7639838950110755e-05, + "loss": 1.3778, + "step": 33900 + }, + { + "epoch": 0.47, + "learning_rate": 2.763287312445144e-05, + "loss": 1.4178, + "step": 34000 + }, + { + "epoch": 0.48, + "learning_rate": 2.7625907298792124e-05, + "loss": 1.4095, + "step": 34100 + }, + { + "epoch": 0.48, + "learning_rate": 2.761894147313281e-05, + "loss": 1.3936, + "step": 34200 + }, + { + "epoch": 0.48, + "learning_rate": 2.7611975647473497e-05, + "loss": 1.3965, + "step": 34300 + }, + { + "epoch": 0.48, + "learning_rate": 2.760500982181418e-05, + "loss": 1.4105, + "step": 34400 + }, + { + "epoch": 0.48, + "learning_rate": 2.7598043996154866e-05, + "loss": 1.3629, + "step": 34500 + }, + { + "epoch": 0.48, + "learning_rate": 2.7591217487008736e-05, + "loss": 1.3862, + "step": 34600 + }, + { + "epoch": 0.48, + "learning_rate": 2.758425166134942e-05, + "loss": 1.4215, + "step": 34700 + }, + { + "epoch": 0.48, + "learning_rate": 2.7577285835690105e-05, + "loss": 1.3996, + "step": 34800 + }, + { + "epoch": 0.49, + "learning_rate": 2.757032001003079e-05, + "loss": 1.4127, + "step": 34900 + }, + { + "epoch": 0.49, + "learning_rate": 2.7563354184371475e-05, + "loss": 1.3986, + "step": 35000 + }, + { + "epoch": 0.49, + "learning_rate": 2.7556388358712158e-05, + "loss": 1.3808, + "step": 35100 + }, + { + "epoch": 0.49, + "learning_rate": 2.7549422533052844e-05, + "loss": 1.3846, + "step": 35200 + }, + { + "epoch": 0.49, + "learning_rate": 2.7542456707393527e-05, + "loss": 1.3943, + "step": 35300 + }, + { + "epoch": 0.49, + "learning_rate": 2.7535490881734214e-05, + "loss": 1.3846, + "step": 35400 + }, + { + "epoch": 0.49, + "learning_rate": 2.7528525056074897e-05, + "loss": 1.41, + "step": 35500 + }, + { + "epoch": 0.5, + "learning_rate": 2.7521559230415583e-05, + "loss": 1.3983, + "step": 35600 + }, + { + "epoch": 0.5, + "learning_rate": 2.7514593404756266e-05, + "loss": 1.4252, + "step": 35700 + }, + { + "epoch": 0.5, + "learning_rate": 2.7507627579096952e-05, + "loss": 1.3834, + "step": 35800 + }, + { + "epoch": 0.5, + "learning_rate": 2.750066175343764e-05, + "loss": 1.3957, + "step": 35900 + }, + { + "epoch": 0.5, + "learning_rate": 2.749369592777832e-05, + "loss": 1.3715, + "step": 36000 + }, + { + "epoch": 0.5, + "learning_rate": 2.7486730102119008e-05, + "loss": 1.3904, + "step": 36100 + }, + { + "epoch": 0.5, + "learning_rate": 2.7479764276459688e-05, + "loss": 1.4317, + "step": 36200 + }, + { + "epoch": 0.51, + "learning_rate": 2.7472798450800374e-05, + "loss": 1.3789, + "step": 36300 + }, + { + "epoch": 0.51, + "learning_rate": 2.7465832625141057e-05, + "loss": 1.343, + "step": 36400 + }, + { + "epoch": 0.51, + "learning_rate": 2.7458866799481743e-05, + "loss": 1.3381, + "step": 36500 + }, + { + "epoch": 0.51, + "learning_rate": 2.7451900973822426e-05, + "loss": 1.4004, + "step": 36600 + }, + { + "epoch": 0.51, + "learning_rate": 2.7444935148163113e-05, + "loss": 1.4007, + "step": 36700 + }, + { + "epoch": 0.51, + "learning_rate": 2.7437969322503796e-05, + "loss": 1.4021, + "step": 36800 + }, + { + "epoch": 0.51, + "learning_rate": 2.7431003496844482e-05, + "loss": 1.4194, + "step": 36900 + }, + { + "epoch": 0.52, + "learning_rate": 2.7424037671185165e-05, + "loss": 1.3586, + "step": 37000 + }, + { + "epoch": 0.52, + "learning_rate": 2.741707184552585e-05, + "loss": 1.3964, + "step": 37100 + }, + { + "epoch": 0.52, + "learning_rate": 2.7410106019866534e-05, + "loss": 1.4042, + "step": 37200 + }, + { + "epoch": 0.52, + "learning_rate": 2.740314019420722e-05, + "loss": 1.4042, + "step": 37300 + }, + { + "epoch": 0.52, + "learning_rate": 2.7396174368547904e-05, + "loss": 1.4241, + "step": 37400 + }, + { + "epoch": 0.52, + "learning_rate": 2.7389278201145184e-05, + "loss": 1.3756, + "step": 37500 + }, + { + "epoch": 0.52, + "learning_rate": 2.7382312375485867e-05, + "loss": 1.3848, + "step": 37600 + }, + { + "epoch": 0.53, + "learning_rate": 2.7375346549826553e-05, + "loss": 1.4253, + "step": 37700 + }, + { + "epoch": 0.53, + "learning_rate": 2.7368380724167236e-05, + "loss": 1.4135, + "step": 37800 + }, + { + "epoch": 0.53, + "learning_rate": 2.7361414898507922e-05, + "loss": 1.4172, + "step": 37900 + }, + { + "epoch": 0.53, + "learning_rate": 2.7354449072848605e-05, + "loss": 1.416, + "step": 38000 + }, + { + "epoch": 0.53, + "learning_rate": 2.7347483247189292e-05, + "loss": 1.3781, + "step": 38100 + }, + { + "epoch": 0.53, + "learning_rate": 2.7340517421529975e-05, + "loss": 1.3785, + "step": 38200 + }, + { + "epoch": 0.53, + "learning_rate": 2.733355159587066e-05, + "loss": 1.3614, + "step": 38300 + }, + { + "epoch": 0.53, + "learning_rate": 2.7326585770211344e-05, + "loss": 1.3609, + "step": 38400 + }, + { + "epoch": 0.54, + "learning_rate": 2.731961994455203e-05, + "loss": 1.3633, + "step": 38500 + }, + { + "epoch": 0.54, + "learning_rate": 2.731265411889271e-05, + "loss": 1.3579, + "step": 38600 + }, + { + "epoch": 0.54, + "learning_rate": 2.7305688293233396e-05, + "loss": 1.3761, + "step": 38700 + }, + { + "epoch": 0.54, + "learning_rate": 2.729872246757408e-05, + "loss": 1.3834, + "step": 38800 + }, + { + "epoch": 0.54, + "learning_rate": 2.7291756641914766e-05, + "loss": 1.3888, + "step": 38900 + }, + { + "epoch": 0.54, + "learning_rate": 2.728479081625545e-05, + "loss": 1.4012, + "step": 39000 + }, + { + "epoch": 0.54, + "learning_rate": 2.7277824990596135e-05, + "loss": 1.4121, + "step": 39100 + }, + { + "epoch": 0.55, + "learning_rate": 2.727085916493682e-05, + "loss": 1.3844, + "step": 39200 + }, + { + "epoch": 0.55, + "learning_rate": 2.7263893339277505e-05, + "loss": 1.348, + "step": 39300 + }, + { + "epoch": 0.55, + "learning_rate": 2.725692751361819e-05, + "loss": 1.4259, + "step": 39400 + }, + { + "epoch": 0.55, + "learning_rate": 2.7249961687958874e-05, + "loss": 1.3763, + "step": 39500 + }, + { + "epoch": 0.55, + "learning_rate": 2.724299586229956e-05, + "loss": 1.3797, + "step": 39600 + }, + { + "epoch": 0.55, + "learning_rate": 2.7236030036640243e-05, + "loss": 1.4056, + "step": 39700 + }, + { + "epoch": 0.55, + "learning_rate": 2.722906421098093e-05, + "loss": 1.3613, + "step": 39800 + }, + { + "epoch": 0.56, + "learning_rate": 2.7222098385321613e-05, + "loss": 1.387, + "step": 39900 + }, + { + "epoch": 0.56, + "learning_rate": 2.72151325596623e-05, + "loss": 1.3829, + "step": 40000 + }, + { + "epoch": 0.56, + "learning_rate": 2.7208166734002982e-05, + "loss": 1.4094, + "step": 40100 + }, + { + "epoch": 0.56, + "learning_rate": 2.720120090834367e-05, + "loss": 1.378, + "step": 40200 + }, + { + "epoch": 0.56, + "learning_rate": 2.719423508268435e-05, + "loss": 1.4012, + "step": 40300 + }, + { + "epoch": 0.56, + "learning_rate": 2.7187269257025038e-05, + "loss": 1.4169, + "step": 40400 + }, + { + "epoch": 0.56, + "learning_rate": 2.718030343136572e-05, + "loss": 1.3587, + "step": 40500 + }, + { + "epoch": 0.57, + "learning_rate": 2.7173337605706407e-05, + "loss": 1.4081, + "step": 40600 + }, + { + "epoch": 0.57, + "learning_rate": 2.716637178004709e-05, + "loss": 1.372, + "step": 40700 + }, + { + "epoch": 0.57, + "learning_rate": 2.7159405954387773e-05, + "loss": 1.3797, + "step": 40800 + }, + { + "epoch": 0.57, + "learning_rate": 2.7152440128728456e-05, + "loss": 1.3596, + "step": 40900 + }, + { + "epoch": 0.57, + "learning_rate": 2.7145474303069142e-05, + "loss": 1.3785, + "step": 41000 + }, + { + "epoch": 0.57, + "learning_rate": 2.7138508477409825e-05, + "loss": 1.3757, + "step": 41100 + }, + { + "epoch": 0.57, + "learning_rate": 2.7131542651750512e-05, + "loss": 1.3541, + "step": 41200 + }, + { + "epoch": 0.58, + "learning_rate": 2.7124576826091195e-05, + "loss": 1.359, + "step": 41300 + }, + { + "epoch": 0.58, + "learning_rate": 2.711761100043188e-05, + "loss": 1.3764, + "step": 41400 + }, + { + "epoch": 0.58, + "learning_rate": 2.7110645174772564e-05, + "loss": 1.397, + "step": 41500 + }, + { + "epoch": 0.58, + "learning_rate": 2.710367934911325e-05, + "loss": 1.378, + "step": 41600 + }, + { + "epoch": 0.58, + "learning_rate": 2.7096713523453937e-05, + "loss": 1.3919, + "step": 41700 + }, + { + "epoch": 0.58, + "learning_rate": 2.708974769779462e-05, + "loss": 1.3885, + "step": 41800 + }, + { + "epoch": 0.58, + "learning_rate": 2.7082781872135306e-05, + "loss": 1.3811, + "step": 41900 + }, + { + "epoch": 0.59, + "learning_rate": 2.707581604647599e-05, + "loss": 1.3357, + "step": 42000 + }, + { + "epoch": 0.59, + "learning_rate": 2.7068850220816676e-05, + "loss": 1.378, + "step": 42100 + }, + { + "epoch": 0.59, + "learning_rate": 2.706188439515736e-05, + "loss": 1.3892, + "step": 42200 + }, + { + "epoch": 0.59, + "learning_rate": 2.7054918569498045e-05, + "loss": 1.4016, + "step": 42300 + }, + { + "epoch": 0.59, + "learning_rate": 2.7047952743838728e-05, + "loss": 1.3872, + "step": 42400 + }, + { + "epoch": 0.59, + "learning_rate": 2.7040986918179414e-05, + "loss": 1.3525, + "step": 42500 + }, + { + "epoch": 0.59, + "learning_rate": 2.7034021092520097e-05, + "loss": 1.4103, + "step": 42600 + }, + { + "epoch": 0.59, + "learning_rate": 2.7027055266860784e-05, + "loss": 1.4118, + "step": 42700 + }, + { + "epoch": 0.6, + "learning_rate": 2.7020089441201467e-05, + "loss": 1.3327, + "step": 42800 + }, + { + "epoch": 0.6, + "learning_rate": 2.7013123615542153e-05, + "loss": 1.3728, + "step": 42900 + }, + { + "epoch": 0.6, + "learning_rate": 2.7006157789882836e-05, + "loss": 1.3841, + "step": 43000 + }, + { + "epoch": 0.6, + "learning_rate": 2.699919196422352e-05, + "loss": 1.3852, + "step": 43100 + }, + { + "epoch": 0.6, + "learning_rate": 2.6992226138564202e-05, + "loss": 1.3959, + "step": 43200 + }, + { + "epoch": 0.6, + "learning_rate": 2.698526031290489e-05, + "loss": 1.3464, + "step": 43300 + }, + { + "epoch": 0.6, + "learning_rate": 2.697829448724557e-05, + "loss": 1.3963, + "step": 43400 + }, + { + "epoch": 0.61, + "learning_rate": 2.6971328661586258e-05, + "loss": 1.3546, + "step": 43500 + }, + { + "epoch": 0.61, + "learning_rate": 2.696436283592694e-05, + "loss": 1.3473, + "step": 43600 + }, + { + "epoch": 0.61, + "learning_rate": 2.6957397010267627e-05, + "loss": 1.3486, + "step": 43700 + }, + { + "epoch": 0.61, + "learning_rate": 2.695043118460831e-05, + "loss": 1.3969, + "step": 43800 + }, + { + "epoch": 0.61, + "learning_rate": 2.6943465358948997e-05, + "loss": 1.3857, + "step": 43900 + }, + { + "epoch": 0.61, + "learning_rate": 2.6936499533289683e-05, + "loss": 1.3501, + "step": 44000 + }, + { + "epoch": 0.61, + "learning_rate": 2.6929533707630366e-05, + "loss": 1.3741, + "step": 44100 + }, + { + "epoch": 0.62, + "learning_rate": 2.6922567881971052e-05, + "loss": 1.3536, + "step": 44200 + }, + { + "epoch": 0.62, + "learning_rate": 2.6915602056311735e-05, + "loss": 1.3821, + "step": 44300 + }, + { + "epoch": 0.62, + "learning_rate": 2.6908636230652422e-05, + "loss": 1.3686, + "step": 44400 + }, + { + "epoch": 0.62, + "learning_rate": 2.6901670404993105e-05, + "loss": 1.3842, + "step": 44500 + }, + { + "epoch": 0.62, + "learning_rate": 2.689470457933379e-05, + "loss": 1.3715, + "step": 44600 + }, + { + "epoch": 0.62, + "learning_rate": 2.6887738753674474e-05, + "loss": 1.3883, + "step": 44700 + }, + { + "epoch": 0.62, + "learning_rate": 2.688077292801516e-05, + "loss": 1.3736, + "step": 44800 + }, + { + "epoch": 0.63, + "learning_rate": 2.6873876760612437e-05, + "loss": 1.4091, + "step": 44900 + }, + { + "epoch": 0.63, + "learning_rate": 2.6866910934953123e-05, + "loss": 1.3606, + "step": 45000 + }, + { + "epoch": 0.63, + "learning_rate": 2.6859945109293806e-05, + "loss": 1.3626, + "step": 45100 + }, + { + "epoch": 0.63, + "learning_rate": 2.6852979283634493e-05, + "loss": 1.3744, + "step": 45200 + }, + { + "epoch": 0.63, + "learning_rate": 2.6846013457975176e-05, + "loss": 1.3564, + "step": 45300 + }, + { + "epoch": 0.63, + "learning_rate": 2.6839047632315862e-05, + "loss": 1.3506, + "step": 45400 + }, + { + "epoch": 0.63, + "learning_rate": 2.683208180665654e-05, + "loss": 1.3395, + "step": 45500 + }, + { + "epoch": 0.64, + "learning_rate": 2.6825115980997228e-05, + "loss": 1.3471, + "step": 45600 + }, + { + "epoch": 0.64, + "learning_rate": 2.681815015533791e-05, + "loss": 1.3495, + "step": 45700 + }, + { + "epoch": 0.64, + "learning_rate": 2.6811184329678597e-05, + "loss": 1.3854, + "step": 45800 + }, + { + "epoch": 0.64, + "learning_rate": 2.680421850401928e-05, + "loss": 1.3284, + "step": 45900 + }, + { + "epoch": 0.64, + "learning_rate": 2.6797252678359967e-05, + "loss": 1.3682, + "step": 46000 + }, + { + "epoch": 0.64, + "learning_rate": 2.679028685270065e-05, + "loss": 1.3701, + "step": 46100 + }, + { + "epoch": 0.64, + "learning_rate": 2.6783321027041336e-05, + "loss": 1.3407, + "step": 46200 + }, + { + "epoch": 0.65, + "learning_rate": 2.677635520138202e-05, + "loss": 1.39, + "step": 46300 + }, + { + "epoch": 0.65, + "learning_rate": 2.6769389375722705e-05, + "loss": 1.3653, + "step": 46400 + }, + { + "epoch": 0.65, + "learning_rate": 2.676242355006339e-05, + "loss": 1.3297, + "step": 46500 + }, + { + "epoch": 0.65, + "learning_rate": 2.6755457724404075e-05, + "loss": 1.3955, + "step": 46600 + }, + { + "epoch": 0.65, + "learning_rate": 2.6748491898744758e-05, + "loss": 1.3503, + "step": 46700 + }, + { + "epoch": 0.65, + "learning_rate": 2.6741526073085444e-05, + "loss": 1.3589, + "step": 46800 + }, + { + "epoch": 0.65, + "learning_rate": 2.6734560247426127e-05, + "loss": 1.3556, + "step": 46900 + }, + { + "epoch": 0.65, + "learning_rate": 2.6727594421766814e-05, + "loss": 1.3429, + "step": 47000 + }, + { + "epoch": 0.66, + "learning_rate": 2.6720628596107497e-05, + "loss": 1.3697, + "step": 47100 + }, + { + "epoch": 0.66, + "learning_rate": 2.6713662770448183e-05, + "loss": 1.3673, + "step": 47200 + }, + { + "epoch": 0.66, + "learning_rate": 2.670669694478887e-05, + "loss": 1.392, + "step": 47300 + }, + { + "epoch": 0.66, + "learning_rate": 2.6699731119129552e-05, + "loss": 1.3497, + "step": 47400 + }, + { + "epoch": 0.66, + "learning_rate": 2.669283495172683e-05, + "loss": 1.3425, + "step": 47500 + }, + { + "epoch": 0.66, + "learning_rate": 2.6685869126067515e-05, + "loss": 1.3948, + "step": 47600 + }, + { + "epoch": 0.66, + "learning_rate": 2.6678903300408198e-05, + "loss": 1.3741, + "step": 47700 + }, + { + "epoch": 0.67, + "learning_rate": 2.6671937474748884e-05, + "loss": 1.348, + "step": 47800 + }, + { + "epoch": 0.67, + "learning_rate": 2.6664971649089564e-05, + "loss": 1.3932, + "step": 47900 + }, + { + "epoch": 0.67, + "learning_rate": 2.665800582343025e-05, + "loss": 1.3822, + "step": 48000 + }, + { + "epoch": 0.67, + "learning_rate": 2.6651039997770937e-05, + "loss": 1.3723, + "step": 48100 + }, + { + "epoch": 0.67, + "learning_rate": 2.664407417211162e-05, + "loss": 1.4158, + "step": 48200 + }, + { + "epoch": 0.67, + "learning_rate": 2.6637108346452306e-05, + "loss": 1.3831, + "step": 48300 + }, + { + "epoch": 0.67, + "learning_rate": 2.663014252079299e-05, + "loss": 1.3851, + "step": 48400 + }, + { + "epoch": 0.68, + "learning_rate": 2.6623176695133676e-05, + "loss": 1.3529, + "step": 48500 + }, + { + "epoch": 0.68, + "learning_rate": 2.661621086947436e-05, + "loss": 1.4082, + "step": 48600 + }, + { + "epoch": 0.68, + "learning_rate": 2.6609245043815045e-05, + "loss": 1.3375, + "step": 48700 + }, + { + "epoch": 0.68, + "learning_rate": 2.6602279218155728e-05, + "loss": 1.3496, + "step": 48800 + }, + { + "epoch": 0.68, + "learning_rate": 2.6595313392496414e-05, + "loss": 1.3226, + "step": 48900 + }, + { + "epoch": 0.68, + "learning_rate": 2.6588347566837097e-05, + "loss": 1.352, + "step": 49000 + }, + { + "epoch": 0.68, + "learning_rate": 2.6581381741177784e-05, + "loss": 1.3398, + "step": 49100 + }, + { + "epoch": 0.69, + "learning_rate": 2.6574415915518467e-05, + "loss": 1.3801, + "step": 49200 + }, + { + "epoch": 0.69, + "learning_rate": 2.6567450089859153e-05, + "loss": 1.3649, + "step": 49300 + }, + { + "epoch": 0.69, + "learning_rate": 2.6560484264199836e-05, + "loss": 1.3455, + "step": 49400 + }, + { + "epoch": 0.69, + "learning_rate": 2.6553518438540522e-05, + "loss": 1.34, + "step": 49500 + }, + { + "epoch": 0.69, + "learning_rate": 2.6546552612881205e-05, + "loss": 1.3499, + "step": 49600 + }, + { + "epoch": 0.69, + "learning_rate": 2.6539586787221892e-05, + "loss": 1.3164, + "step": 49700 + }, + { + "epoch": 0.69, + "learning_rate": 2.6532620961562575e-05, + "loss": 1.3259, + "step": 49800 + }, + { + "epoch": 0.7, + "learning_rate": 2.652565513590326e-05, + "loss": 1.3376, + "step": 49900 + }, + { + "epoch": 0.7, + "learning_rate": 2.6518689310243944e-05, + "loss": 1.3874, + "step": 50000 + }, + { + "epoch": 0.7, + "learning_rate": 2.651172348458463e-05, + "loss": 1.4079, + "step": 50100 + }, + { + "epoch": 0.7, + "learning_rate": 2.650475765892531e-05, + "loss": 1.4041, + "step": 50200 + }, + { + "epoch": 0.7, + "learning_rate": 2.6497791833265996e-05, + "loss": 1.3329, + "step": 50300 + }, + { + "epoch": 0.7, + "learning_rate": 2.649082600760668e-05, + "loss": 1.3223, + "step": 50400 + }, + { + "epoch": 0.7, + "learning_rate": 2.6483860181947366e-05, + "loss": 1.3544, + "step": 50500 + }, + { + "epoch": 0.7, + "learning_rate": 2.6476894356288052e-05, + "loss": 1.3594, + "step": 50600 + }, + { + "epoch": 0.71, + "learning_rate": 2.6469928530628735e-05, + "loss": 1.3821, + "step": 50700 + }, + { + "epoch": 0.71, + "learning_rate": 2.646296270496942e-05, + "loss": 1.3086, + "step": 50800 + }, + { + "epoch": 0.71, + "learning_rate": 2.6455996879310105e-05, + "loss": 1.3501, + "step": 50900 + }, + { + "epoch": 0.71, + "learning_rate": 2.644903105365079e-05, + "loss": 1.3729, + "step": 51000 + }, + { + "epoch": 0.71, + "learning_rate": 2.6442065227991474e-05, + "loss": 1.3819, + "step": 51100 + }, + { + "epoch": 0.71, + "learning_rate": 2.643509940233216e-05, + "loss": 1.3432, + "step": 51200 + }, + { + "epoch": 0.71, + "learning_rate": 2.6428133576672843e-05, + "loss": 1.3654, + "step": 51300 + }, + { + "epoch": 0.72, + "learning_rate": 2.642116775101353e-05, + "loss": 1.3374, + "step": 51400 + }, + { + "epoch": 0.72, + "learning_rate": 2.6414201925354213e-05, + "loss": 1.3297, + "step": 51500 + }, + { + "epoch": 0.72, + "learning_rate": 2.6407305757951492e-05, + "loss": 1.3748, + "step": 51600 + }, + { + "epoch": 0.72, + "learning_rate": 2.6400339932292175e-05, + "loss": 1.3697, + "step": 51700 + }, + { + "epoch": 0.72, + "learning_rate": 2.6393374106632862e-05, + "loss": 1.3465, + "step": 51800 + }, + { + "epoch": 0.72, + "learning_rate": 2.6386408280973545e-05, + "loss": 1.321, + "step": 51900 + }, + { + "epoch": 0.72, + "learning_rate": 2.637944245531423e-05, + "loss": 1.325, + "step": 52000 + }, + { + "epoch": 0.73, + "learning_rate": 2.6372476629654914e-05, + "loss": 1.3387, + "step": 52100 + }, + { + "epoch": 0.73, + "learning_rate": 2.63655108039956e-05, + "loss": 1.3562, + "step": 52200 + }, + { + "epoch": 0.73, + "learning_rate": 2.6358614636592877e-05, + "loss": 1.3304, + "step": 52300 + }, + { + "epoch": 0.73, + "learning_rate": 2.6351648810933563e-05, + "loss": 1.3403, + "step": 52400 + }, + { + "epoch": 0.73, + "learning_rate": 2.634475264353084e-05, + "loss": 1.3563, + "step": 52500 + }, + { + "epoch": 0.73, + "learning_rate": 2.6337786817871523e-05, + "loss": 1.3802, + "step": 52600 + }, + { + "epoch": 0.73, + "learning_rate": 2.633082099221221e-05, + "loss": 1.3703, + "step": 52700 + }, + { + "epoch": 0.74, + "learning_rate": 2.6323855166552892e-05, + "loss": 1.3479, + "step": 52800 + }, + { + "epoch": 0.74, + "learning_rate": 2.6316889340893575e-05, + "loss": 1.3605, + "step": 52900 + }, + { + "epoch": 0.74, + "learning_rate": 2.630992351523426e-05, + "loss": 1.3378, + "step": 53000 + }, + { + "epoch": 0.74, + "learning_rate": 2.6302957689574945e-05, + "loss": 1.3712, + "step": 53100 + }, + { + "epoch": 0.74, + "learning_rate": 2.629599186391563e-05, + "loss": 1.3279, + "step": 53200 + }, + { + "epoch": 0.74, + "learning_rate": 2.6289026038256314e-05, + "loss": 1.3342, + "step": 53300 + }, + { + "epoch": 0.74, + "learning_rate": 2.6282060212597e-05, + "loss": 1.3442, + "step": 53400 + }, + { + "epoch": 0.75, + "learning_rate": 2.6275094386937683e-05, + "loss": 1.3437, + "step": 53500 + }, + { + "epoch": 0.75, + "learning_rate": 2.626812856127837e-05, + "loss": 1.3576, + "step": 53600 + }, + { + "epoch": 0.75, + "learning_rate": 2.6261162735619053e-05, + "loss": 1.358, + "step": 53700 + }, + { + "epoch": 0.75, + "learning_rate": 2.625419690995974e-05, + "loss": 1.3469, + "step": 53800 + }, + { + "epoch": 0.75, + "learning_rate": 2.6247231084300422e-05, + "loss": 1.3949, + "step": 53900 + }, + { + "epoch": 0.75, + "learning_rate": 2.624026525864111e-05, + "loss": 1.3745, + "step": 54000 + }, + { + "epoch": 0.75, + "learning_rate": 2.623329943298179e-05, + "loss": 1.3844, + "step": 54100 + }, + { + "epoch": 0.76, + "learning_rate": 2.6226333607322478e-05, + "loss": 1.3565, + "step": 54200 + }, + { + "epoch": 0.76, + "learning_rate": 2.621936778166316e-05, + "loss": 1.3887, + "step": 54300 + }, + { + "epoch": 0.76, + "learning_rate": 2.6212401956003847e-05, + "loss": 1.3459, + "step": 54400 + }, + { + "epoch": 0.76, + "learning_rate": 2.620543613034453e-05, + "loss": 1.3812, + "step": 54500 + }, + { + "epoch": 0.76, + "learning_rate": 2.6198470304685216e-05, + "loss": 1.3449, + "step": 54600 + }, + { + "epoch": 0.76, + "learning_rate": 2.61915044790259e-05, + "loss": 1.3412, + "step": 54700 + }, + { + "epoch": 0.76, + "learning_rate": 2.6184538653366586e-05, + "loss": 1.3461, + "step": 54800 + }, + { + "epoch": 0.76, + "learning_rate": 2.617757282770727e-05, + "loss": 1.3561, + "step": 54900 + }, + { + "epoch": 0.77, + "learning_rate": 2.6170607002047955e-05, + "loss": 1.3813, + "step": 55000 + }, + { + "epoch": 0.77, + "learning_rate": 2.6163641176388638e-05, + "loss": 1.374, + "step": 55100 + }, + { + "epoch": 0.77, + "learning_rate": 2.615667535072932e-05, + "loss": 1.3128, + "step": 55200 + }, + { + "epoch": 0.77, + "learning_rate": 2.6149709525070004e-05, + "loss": 1.3356, + "step": 55300 + }, + { + "epoch": 0.77, + "learning_rate": 2.614274369941069e-05, + "loss": 1.3672, + "step": 55400 + }, + { + "epoch": 0.77, + "learning_rate": 2.6135777873751377e-05, + "loss": 1.3412, + "step": 55500 + }, + { + "epoch": 0.77, + "learning_rate": 2.612881204809206e-05, + "loss": 1.3583, + "step": 55600 + }, + { + "epoch": 0.78, + "learning_rate": 2.6121846222432746e-05, + "loss": 1.3645, + "step": 55700 + }, + { + "epoch": 0.78, + "learning_rate": 2.611488039677343e-05, + "loss": 1.3354, + "step": 55800 + }, + { + "epoch": 0.78, + "learning_rate": 2.6107914571114116e-05, + "loss": 1.383, + "step": 55900 + }, + { + "epoch": 0.78, + "learning_rate": 2.61009487454548e-05, + "loss": 1.2789, + "step": 56000 + }, + { + "epoch": 0.78, + "learning_rate": 2.6093982919795485e-05, + "loss": 1.3473, + "step": 56100 + }, + { + "epoch": 0.78, + "learning_rate": 2.6087017094136168e-05, + "loss": 1.3763, + "step": 56200 + }, + { + "epoch": 0.78, + "learning_rate": 2.6080051268476854e-05, + "loss": 1.3376, + "step": 56300 + }, + { + "epoch": 0.79, + "learning_rate": 2.6073085442817537e-05, + "loss": 1.3036, + "step": 56400 + }, + { + "epoch": 0.79, + "learning_rate": 2.6066119617158224e-05, + "loss": 1.3755, + "step": 56500 + }, + { + "epoch": 0.79, + "learning_rate": 2.6059153791498907e-05, + "loss": 1.3289, + "step": 56600 + }, + { + "epoch": 0.79, + "learning_rate": 2.6052187965839593e-05, + "loss": 1.357, + "step": 56700 + }, + { + "epoch": 0.79, + "learning_rate": 2.6045222140180276e-05, + "loss": 1.3371, + "step": 56800 + }, + { + "epoch": 0.79, + "learning_rate": 2.6038256314520962e-05, + "loss": 1.2948, + "step": 56900 + }, + { + "epoch": 0.79, + "learning_rate": 2.6031290488861645e-05, + "loss": 1.3417, + "step": 57000 + }, + { + "epoch": 0.8, + "learning_rate": 2.6024324663202332e-05, + "loss": 1.3512, + "step": 57100 + }, + { + "epoch": 0.8, + "learning_rate": 2.6017358837543015e-05, + "loss": 1.3313, + "step": 57200 + }, + { + "epoch": 0.8, + "learning_rate": 2.60103930118837e-05, + "loss": 1.3259, + "step": 57300 + }, + { + "epoch": 0.8, + "learning_rate": 2.600342718622438e-05, + "loss": 1.3269, + "step": 57400 + }, + { + "epoch": 0.8, + "learning_rate": 2.5996461360565067e-05, + "loss": 1.3432, + "step": 57500 + }, + { + "epoch": 0.8, + "learning_rate": 2.598949553490575e-05, + "loss": 1.3698, + "step": 57600 + }, + { + "epoch": 0.8, + "learning_rate": 2.5982529709246437e-05, + "loss": 1.3546, + "step": 57700 + }, + { + "epoch": 0.81, + "learning_rate": 2.597556388358712e-05, + "loss": 1.383, + "step": 57800 + }, + { + "epoch": 0.81, + "learning_rate": 2.5968598057927806e-05, + "loss": 1.2983, + "step": 57900 + }, + { + "epoch": 0.81, + "learning_rate": 2.5961632232268492e-05, + "loss": 1.3356, + "step": 58000 + }, + { + "epoch": 0.81, + "learning_rate": 2.595473606486577e-05, + "loss": 1.3308, + "step": 58100 + }, + { + "epoch": 0.81, + "learning_rate": 2.5947770239206452e-05, + "loss": 1.3321, + "step": 58200 + }, + { + "epoch": 0.81, + "learning_rate": 2.5940804413547138e-05, + "loss": 1.3815, + "step": 58300 + }, + { + "epoch": 0.81, + "learning_rate": 2.593383858788782e-05, + "loss": 1.3486, + "step": 58400 + }, + { + "epoch": 0.82, + "learning_rate": 2.5926872762228507e-05, + "loss": 1.3451, + "step": 58500 + }, + { + "epoch": 0.82, + "learning_rate": 2.591990693656919e-05, + "loss": 1.3521, + "step": 58600 + }, + { + "epoch": 0.82, + "learning_rate": 2.5912941110909877e-05, + "loss": 1.3315, + "step": 58700 + }, + { + "epoch": 0.82, + "learning_rate": 2.5905975285250563e-05, + "loss": 1.3411, + "step": 58800 + }, + { + "epoch": 0.82, + "learning_rate": 2.5899009459591246e-05, + "loss": 1.2954, + "step": 58900 + }, + { + "epoch": 0.82, + "learning_rate": 2.5892043633931933e-05, + "loss": 1.3433, + "step": 59000 + }, + { + "epoch": 0.82, + "learning_rate": 2.5885077808272616e-05, + "loss": 1.3422, + "step": 59100 + }, + { + "epoch": 0.82, + "learning_rate": 2.5878111982613302e-05, + "loss": 1.3384, + "step": 59200 + }, + { + "epoch": 0.83, + "learning_rate": 2.5871146156953985e-05, + "loss": 1.2941, + "step": 59300 + }, + { + "epoch": 0.83, + "learning_rate": 2.586418033129467e-05, + "loss": 1.3337, + "step": 59400 + }, + { + "epoch": 0.83, + "learning_rate": 2.5857214505635354e-05, + "loss": 1.3622, + "step": 59500 + }, + { + "epoch": 0.83, + "learning_rate": 2.585024867997604e-05, + "loss": 1.36, + "step": 59600 + }, + { + "epoch": 0.83, + "learning_rate": 2.5843282854316724e-05, + "loss": 1.336, + "step": 59700 + }, + { + "epoch": 0.83, + "learning_rate": 2.583631702865741e-05, + "loss": 1.3074, + "step": 59800 + }, + { + "epoch": 0.83, + "learning_rate": 2.582935120299809e-05, + "loss": 1.3693, + "step": 59900 + }, + { + "epoch": 0.84, + "learning_rate": 2.5822385377338776e-05, + "loss": 1.3477, + "step": 60000 + }, + { + "epoch": 0.84, + "learning_rate": 2.581541955167946e-05, + "loss": 1.3297, + "step": 60100 + }, + { + "epoch": 0.84, + "learning_rate": 2.5808453726020145e-05, + "loss": 1.3227, + "step": 60200 + }, + { + "epoch": 0.84, + "learning_rate": 2.580148790036083e-05, + "loss": 1.3525, + "step": 60300 + }, + { + "epoch": 0.84, + "learning_rate": 2.5794522074701515e-05, + "loss": 1.374, + "step": 60400 + }, + { + "epoch": 0.84, + "learning_rate": 2.5787556249042198e-05, + "loss": 1.3384, + "step": 60500 + }, + { + "epoch": 0.84, + "learning_rate": 2.5780590423382884e-05, + "loss": 1.3401, + "step": 60600 + }, + { + "epoch": 0.85, + "learning_rate": 2.5773624597723567e-05, + "loss": 1.3454, + "step": 60700 + }, + { + "epoch": 0.85, + "learning_rate": 2.5766658772064253e-05, + "loss": 1.3388, + "step": 60800 + }, + { + "epoch": 0.85, + "learning_rate": 2.5759692946404936e-05, + "loss": 1.3883, + "step": 60900 + }, + { + "epoch": 0.85, + "learning_rate": 2.5752727120745623e-05, + "loss": 1.3638, + "step": 61000 + }, + { + "epoch": 0.85, + "learning_rate": 2.5745761295086306e-05, + "loss": 1.3263, + "step": 61100 + }, + { + "epoch": 0.85, + "learning_rate": 2.5738795469426992e-05, + "loss": 1.3555, + "step": 61200 + }, + { + "epoch": 0.85, + "learning_rate": 2.573182964376768e-05, + "loss": 1.3402, + "step": 61300 + }, + { + "epoch": 0.86, + "learning_rate": 2.572486381810836e-05, + "loss": 1.3395, + "step": 61400 + }, + { + "epoch": 0.86, + "learning_rate": 2.5717897992449048e-05, + "loss": 1.3215, + "step": 61500 + }, + { + "epoch": 0.86, + "learning_rate": 2.571093216678973e-05, + "loss": 1.3557, + "step": 61600 + }, + { + "epoch": 0.86, + "learning_rate": 2.5703966341130417e-05, + "loss": 1.3658, + "step": 61700 + }, + { + "epoch": 0.86, + "learning_rate": 2.56970005154711e-05, + "loss": 1.3467, + "step": 61800 + }, + { + "epoch": 0.86, + "learning_rate": 2.5690034689811787e-05, + "loss": 1.3427, + "step": 61900 + }, + { + "epoch": 0.86, + "learning_rate": 2.568306886415247e-05, + "loss": 1.3249, + "step": 62000 + }, + { + "epoch": 0.87, + "learning_rate": 2.5676103038493153e-05, + "loss": 1.3471, + "step": 62100 + }, + { + "epoch": 0.87, + "learning_rate": 2.5669206871090432e-05, + "loss": 1.3272, + "step": 62200 + }, + { + "epoch": 0.87, + "learning_rate": 2.5662241045431115e-05, + "loss": 1.307, + "step": 62300 + }, + { + "epoch": 0.87, + "learning_rate": 2.56552752197718e-05, + "loss": 1.2977, + "step": 62400 + }, + { + "epoch": 0.87, + "learning_rate": 2.5648309394112485e-05, + "loss": 1.3312, + "step": 62500 + }, + { + "epoch": 0.87, + "learning_rate": 2.5641343568453168e-05, + "loss": 1.3112, + "step": 62600 + }, + { + "epoch": 0.87, + "learning_rate": 2.5634377742793854e-05, + "loss": 1.377, + "step": 62700 + }, + { + "epoch": 0.87, + "learning_rate": 2.5627411917134537e-05, + "loss": 1.3737, + "step": 62800 + }, + { + "epoch": 0.88, + "learning_rate": 2.5620446091475224e-05, + "loss": 1.3194, + "step": 62900 + }, + { + "epoch": 0.88, + "learning_rate": 2.5613480265815907e-05, + "loss": 1.3558, + "step": 63000 + }, + { + "epoch": 0.88, + "learning_rate": 2.5606514440156593e-05, + "loss": 1.3076, + "step": 63100 + }, + { + "epoch": 0.88, + "learning_rate": 2.5599548614497276e-05, + "loss": 1.3565, + "step": 63200 + }, + { + "epoch": 0.88, + "learning_rate": 2.5592582788837962e-05, + "loss": 1.3515, + "step": 63300 + }, + { + "epoch": 0.88, + "learning_rate": 2.5585616963178645e-05, + "loss": 1.3132, + "step": 63400 + }, + { + "epoch": 0.88, + "learning_rate": 2.557865113751933e-05, + "loss": 1.3345, + "step": 63500 + }, + { + "epoch": 0.89, + "learning_rate": 2.5571685311860015e-05, + "loss": 1.3776, + "step": 63600 + }, + { + "epoch": 0.89, + "learning_rate": 2.55647194862007e-05, + "loss": 1.3238, + "step": 63700 + }, + { + "epoch": 0.89, + "learning_rate": 2.5557753660541384e-05, + "loss": 1.3039, + "step": 63800 + }, + { + "epoch": 0.89, + "learning_rate": 2.555078783488207e-05, + "loss": 1.3506, + "step": 63900 + }, + { + "epoch": 0.89, + "learning_rate": 2.5543822009222753e-05, + "loss": 1.3469, + "step": 64000 + }, + { + "epoch": 0.89, + "learning_rate": 2.553685618356344e-05, + "loss": 1.3297, + "step": 64100 + }, + { + "epoch": 0.89, + "learning_rate": 2.5529890357904123e-05, + "loss": 1.3315, + "step": 64200 + }, + { + "epoch": 0.9, + "learning_rate": 2.5522994190501403e-05, + "loss": 1.3083, + "step": 64300 + }, + { + "epoch": 0.9, + "learning_rate": 2.5516028364842086e-05, + "loss": 1.3433, + "step": 64400 + }, + { + "epoch": 0.9, + "learning_rate": 2.5509062539182772e-05, + "loss": 1.3406, + "step": 64500 + }, + { + "epoch": 0.9, + "learning_rate": 2.5502096713523455e-05, + "loss": 1.3269, + "step": 64600 + }, + { + "epoch": 0.9, + "learning_rate": 2.5495130887864138e-05, + "loss": 1.3314, + "step": 64700 + }, + { + "epoch": 0.9, + "learning_rate": 2.548816506220482e-05, + "loss": 1.3872, + "step": 64800 + }, + { + "epoch": 0.9, + "learning_rate": 2.5481199236545507e-05, + "loss": 1.3365, + "step": 64900 + }, + { + "epoch": 0.91, + "learning_rate": 2.547423341088619e-05, + "loss": 1.3184, + "step": 65000 + }, + { + "epoch": 0.91, + "learning_rate": 2.5467267585226877e-05, + "loss": 1.2843, + "step": 65100 + }, + { + "epoch": 0.91, + "learning_rate": 2.5460301759567563e-05, + "loss": 1.3652, + "step": 65200 + }, + { + "epoch": 0.91, + "learning_rate": 2.5453335933908246e-05, + "loss": 1.3367, + "step": 65300 + }, + { + "epoch": 0.91, + "learning_rate": 2.5446370108248932e-05, + "loss": 1.3429, + "step": 65400 + }, + { + "epoch": 0.91, + "learning_rate": 2.5439404282589615e-05, + "loss": 1.3395, + "step": 65500 + }, + { + "epoch": 0.91, + "learning_rate": 2.5432438456930302e-05, + "loss": 1.329, + "step": 65600 + }, + { + "epoch": 0.92, + "learning_rate": 2.5425542289527578e-05, + "loss": 1.31, + "step": 65700 + }, + { + "epoch": 0.92, + "learning_rate": 2.541857646386826e-05, + "loss": 1.3535, + "step": 65800 + }, + { + "epoch": 0.92, + "learning_rate": 2.5411610638208948e-05, + "loss": 1.3204, + "step": 65900 + }, + { + "epoch": 0.92, + "learning_rate": 2.540464481254963e-05, + "loss": 1.3465, + "step": 66000 + }, + { + "epoch": 0.92, + "learning_rate": 2.5397678986890317e-05, + "loss": 1.3004, + "step": 66100 + }, + { + "epoch": 0.92, + "learning_rate": 2.5390713161231003e-05, + "loss": 1.3113, + "step": 66200 + }, + { + "epoch": 0.92, + "learning_rate": 2.5383747335571686e-05, + "loss": 1.3437, + "step": 66300 + }, + { + "epoch": 0.93, + "learning_rate": 2.5376781509912373e-05, + "loss": 1.3098, + "step": 66400 + }, + { + "epoch": 0.93, + "learning_rate": 2.5369815684253056e-05, + "loss": 1.3631, + "step": 66500 + }, + { + "epoch": 0.93, + "learning_rate": 2.5362849858593742e-05, + "loss": 1.3486, + "step": 66600 + }, + { + "epoch": 0.93, + "learning_rate": 2.5355884032934425e-05, + "loss": 1.3174, + "step": 66700 + }, + { + "epoch": 0.93, + "learning_rate": 2.534891820727511e-05, + "loss": 1.3509, + "step": 66800 + }, + { + "epoch": 0.93, + "learning_rate": 2.5341952381615794e-05, + "loss": 1.3222, + "step": 66900 + }, + { + "epoch": 0.93, + "learning_rate": 2.533498655595648e-05, + "loss": 1.3343, + "step": 67000 + }, + { + "epoch": 0.93, + "learning_rate": 2.532802073029716e-05, + "loss": 1.341, + "step": 67100 + }, + { + "epoch": 0.94, + "learning_rate": 2.5321054904637847e-05, + "loss": 1.3564, + "step": 67200 + }, + { + "epoch": 0.94, + "learning_rate": 2.531408907897853e-05, + "loss": 1.3297, + "step": 67300 + }, + { + "epoch": 0.94, + "learning_rate": 2.5307123253319216e-05, + "loss": 1.3156, + "step": 67400 + }, + { + "epoch": 0.94, + "learning_rate": 2.53001574276599e-05, + "loss": 1.309, + "step": 67500 + }, + { + "epoch": 0.94, + "learning_rate": 2.5293191602000585e-05, + "loss": 1.2972, + "step": 67600 + }, + { + "epoch": 0.94, + "learning_rate": 2.528622577634127e-05, + "loss": 1.2935, + "step": 67700 + }, + { + "epoch": 0.94, + "learning_rate": 2.5279259950681955e-05, + "loss": 1.3517, + "step": 67800 + }, + { + "epoch": 0.95, + "learning_rate": 2.5272294125022638e-05, + "loss": 1.3298, + "step": 67900 + }, + { + "epoch": 0.95, + "learning_rate": 2.5265328299363324e-05, + "loss": 1.3111, + "step": 68000 + }, + { + "epoch": 0.95, + "learning_rate": 2.5258362473704007e-05, + "loss": 1.2882, + "step": 68100 + }, + { + "epoch": 0.95, + "learning_rate": 2.5251396648044694e-05, + "loss": 1.3066, + "step": 68200 + }, + { + "epoch": 0.95, + "learning_rate": 2.5244430822385377e-05, + "loss": 1.3078, + "step": 68300 + }, + { + "epoch": 0.95, + "learning_rate": 2.5237464996726063e-05, + "loss": 1.3353, + "step": 68400 + }, + { + "epoch": 0.95, + "learning_rate": 2.523049917106675e-05, + "loss": 1.3302, + "step": 68500 + }, + { + "epoch": 0.96, + "learning_rate": 2.5223533345407432e-05, + "loss": 1.3026, + "step": 68600 + }, + { + "epoch": 0.96, + "learning_rate": 2.521656751974812e-05, + "loss": 1.3094, + "step": 68700 + }, + { + "epoch": 0.96, + "learning_rate": 2.52096016940888e-05, + "loss": 1.301, + "step": 68800 + }, + { + "epoch": 0.96, + "learning_rate": 2.5202635868429488e-05, + "loss": 1.3381, + "step": 68900 + }, + { + "epoch": 0.96, + "learning_rate": 2.519567004277017e-05, + "loss": 1.296, + "step": 69000 + }, + { + "epoch": 0.96, + "learning_rate": 2.5188704217110857e-05, + "loss": 1.3327, + "step": 69100 + }, + { + "epoch": 0.96, + "learning_rate": 2.518173839145154e-05, + "loss": 1.3504, + "step": 69200 + }, + { + "epoch": 0.97, + "learning_rate": 2.5174772565792227e-05, + "loss": 1.329, + "step": 69300 + }, + { + "epoch": 0.97, + "learning_rate": 2.5167806740132906e-05, + "loss": 1.3304, + "step": 69400 + }, + { + "epoch": 0.97, + "learning_rate": 2.5160840914473593e-05, + "loss": 1.3442, + "step": 69500 + }, + { + "epoch": 0.97, + "learning_rate": 2.5153875088814276e-05, + "loss": 1.2983, + "step": 69600 + }, + { + "epoch": 0.97, + "learning_rate": 2.5146909263154962e-05, + "loss": 1.3069, + "step": 69700 + }, + { + "epoch": 0.97, + "learning_rate": 2.5139943437495645e-05, + "loss": 1.3425, + "step": 69800 + }, + { + "epoch": 0.97, + "learning_rate": 2.5133116928349515e-05, + "loss": 1.2985, + "step": 69900 + }, + { + "epoch": 0.98, + "learning_rate": 2.51261511026902e-05, + "loss": 1.3073, + "step": 70000 + }, + { + "epoch": 0.98, + "learning_rate": 2.5119254935287478e-05, + "loss": 1.3266, + "step": 70100 + }, + { + "epoch": 0.98, + "learning_rate": 2.5112289109628164e-05, + "loss": 1.3149, + "step": 70200 + }, + { + "epoch": 0.98, + "learning_rate": 2.5105323283968847e-05, + "loss": 1.352, + "step": 70300 + }, + { + "epoch": 0.98, + "learning_rate": 2.5098357458309534e-05, + "loss": 1.3348, + "step": 70400 + }, + { + "epoch": 0.98, + "learning_rate": 2.5091391632650217e-05, + "loss": 1.3357, + "step": 70500 + }, + { + "epoch": 0.98, + "learning_rate": 2.5084425806990903e-05, + "loss": 1.3143, + "step": 70600 + }, + { + "epoch": 0.98, + "learning_rate": 2.5077459981331586e-05, + "loss": 1.3632, + "step": 70700 + }, + { + "epoch": 0.99, + "learning_rate": 2.5070494155672272e-05, + "loss": 1.3142, + "step": 70800 + }, + { + "epoch": 0.99, + "learning_rate": 2.506352833001296e-05, + "loss": 1.2954, + "step": 70900 + }, + { + "epoch": 0.99, + "learning_rate": 2.505656250435364e-05, + "loss": 1.3081, + "step": 71000 + }, + { + "epoch": 0.99, + "learning_rate": 2.5049596678694328e-05, + "loss": 1.2836, + "step": 71100 + }, + { + "epoch": 0.99, + "learning_rate": 2.504263085303501e-05, + "loss": 1.3663, + "step": 71200 + }, + { + "epoch": 0.99, + "learning_rate": 2.5035665027375697e-05, + "loss": 1.3225, + "step": 71300 + }, + { + "epoch": 0.99, + "learning_rate": 2.502869920171638e-05, + "loss": 1.2865, + "step": 71400 + }, + { + "epoch": 1.0, + "learning_rate": 2.5021733376057067e-05, + "loss": 1.3454, + "step": 71500 + }, + { + "epoch": 1.0, + "learning_rate": 2.501476755039775e-05, + "loss": 1.3092, + "step": 71600 + }, + { + "epoch": 1.0, + "learning_rate": 2.5007801724738436e-05, + "loss": 1.2949, + "step": 71700 + }, + { + "epoch": 1.0, + "eval_gen_len": 20.0, + "eval_loss": 1.208003044128418, + "eval_rouge1": 11.7171, + "eval_rouge2": 3.3284, + "eval_rougeL": 11.3209, + "eval_rougeLsum": 11.4022, + "eval_runtime": 1534.4697, + "eval_samples_per_second": 8.712, + "eval_steps_per_second": 2.178, + "step": 71779 + }, + { + "epoch": 1.0, + "learning_rate": 2.500083589907912e-05, + "loss": 1.2763, + "step": 71800 + }, + { + "epoch": 1.0, + "learning_rate": 2.4993870073419805e-05, + "loss": 1.2162, + "step": 71900 + }, + { + "epoch": 1.0, + "learning_rate": 2.498690424776049e-05, + "loss": 1.1719, + "step": 72000 + }, + { + "epoch": 1.0, + "learning_rate": 2.497993842210117e-05, + "loss": 1.2228, + "step": 72100 + }, + { + "epoch": 1.01, + "learning_rate": 2.4972972596441854e-05, + "loss": 1.2252, + "step": 72200 + }, + { + "epoch": 1.01, + "learning_rate": 2.496600677078254e-05, + "loss": 1.1962, + "step": 72300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4959040945123224e-05, + "loss": 1.1819, + "step": 72400 + }, + { + "epoch": 1.01, + "learning_rate": 2.495207511946391e-05, + "loss": 1.1982, + "step": 72500 + }, + { + "epoch": 1.01, + "learning_rate": 2.4945109293804593e-05, + "loss": 1.2265, + "step": 72600 + }, + { + "epoch": 1.01, + "learning_rate": 2.493814346814528e-05, + "loss": 1.1959, + "step": 72700 + }, + { + "epoch": 1.01, + "learning_rate": 2.4931177642485963e-05, + "loss": 1.176, + "step": 72800 + }, + { + "epoch": 1.02, + "learning_rate": 2.492421181682665e-05, + "loss": 1.2382, + "step": 72900 + }, + { + "epoch": 1.02, + "learning_rate": 2.4917245991167332e-05, + "loss": 1.2064, + "step": 73000 + }, + { + "epoch": 1.02, + "learning_rate": 2.4910280165508018e-05, + "loss": 1.2151, + "step": 73100 + }, + { + "epoch": 1.02, + "learning_rate": 2.49033143398487e-05, + "loss": 1.1908, + "step": 73200 + }, + { + "epoch": 1.02, + "learning_rate": 2.4896348514189388e-05, + "loss": 1.2045, + "step": 73300 + }, + { + "epoch": 1.02, + "learning_rate": 2.4889382688530074e-05, + "loss": 1.2272, + "step": 73400 + }, + { + "epoch": 1.02, + "learning_rate": 2.4882416862870757e-05, + "loss": 1.1965, + "step": 73500 + }, + { + "epoch": 1.03, + "learning_rate": 2.4875451037211443e-05, + "loss": 1.2207, + "step": 73600 + }, + { + "epoch": 1.03, + "learning_rate": 2.4868485211552126e-05, + "loss": 1.1842, + "step": 73700 + }, + { + "epoch": 1.03, + "learning_rate": 2.4861519385892813e-05, + "loss": 1.2278, + "step": 73800 + }, + { + "epoch": 1.03, + "learning_rate": 2.4854553560233496e-05, + "loss": 1.2294, + "step": 73900 + }, + { + "epoch": 1.03, + "learning_rate": 2.4847587734574182e-05, + "loss": 1.2443, + "step": 74000 + }, + { + "epoch": 1.03, + "learning_rate": 2.4840621908914865e-05, + "loss": 1.2025, + "step": 74100 + }, + { + "epoch": 1.03, + "learning_rate": 2.483365608325555e-05, + "loss": 1.2018, + "step": 74200 + }, + { + "epoch": 1.04, + "learning_rate": 2.4826690257596234e-05, + "loss": 1.202, + "step": 74300 + }, + { + "epoch": 1.04, + "learning_rate": 2.4819724431936917e-05, + "loss": 1.1752, + "step": 74400 + }, + { + "epoch": 1.04, + "learning_rate": 2.48127586062776e-05, + "loss": 1.2313, + "step": 74500 + }, + { + "epoch": 1.04, + "learning_rate": 2.4805792780618287e-05, + "loss": 1.1882, + "step": 74600 + }, + { + "epoch": 1.04, + "learning_rate": 2.479882695495897e-05, + "loss": 1.2348, + "step": 74700 + }, + { + "epoch": 1.04, + "learning_rate": 2.4791861129299656e-05, + "loss": 1.2377, + "step": 74800 + }, + { + "epoch": 1.04, + "learning_rate": 2.478489530364034e-05, + "loss": 1.1769, + "step": 74900 + }, + { + "epoch": 1.04, + "learning_rate": 2.4777929477981026e-05, + "loss": 1.2058, + "step": 75000 + }, + { + "epoch": 1.05, + "learning_rate": 2.477096365232171e-05, + "loss": 1.1412, + "step": 75100 + }, + { + "epoch": 1.05, + "learning_rate": 2.4763997826662395e-05, + "loss": 1.2086, + "step": 75200 + }, + { + "epoch": 1.05, + "learning_rate": 2.4757032001003078e-05, + "loss": 1.2231, + "step": 75300 + }, + { + "epoch": 1.05, + "learning_rate": 2.4750066175343764e-05, + "loss": 1.2127, + "step": 75400 + }, + { + "epoch": 1.05, + "learning_rate": 2.4743100349684447e-05, + "loss": 1.2203, + "step": 75500 + }, + { + "epoch": 1.05, + "learning_rate": 2.4736134524025134e-05, + "loss": 1.2357, + "step": 75600 + }, + { + "epoch": 1.05, + "learning_rate": 2.4729168698365817e-05, + "loss": 1.1935, + "step": 75700 + }, + { + "epoch": 1.06, + "learning_rate": 2.4722202872706503e-05, + "loss": 1.242, + "step": 75800 + }, + { + "epoch": 1.06, + "learning_rate": 2.471523704704719e-05, + "loss": 1.2526, + "step": 75900 + }, + { + "epoch": 1.06, + "learning_rate": 2.4708271221387872e-05, + "loss": 1.2058, + "step": 76000 + }, + { + "epoch": 1.06, + "learning_rate": 2.470137505398515e-05, + "loss": 1.2012, + "step": 76100 + }, + { + "epoch": 1.06, + "learning_rate": 2.4694409228325835e-05, + "loss": 1.2217, + "step": 76200 + }, + { + "epoch": 1.06, + "learning_rate": 2.4687443402666518e-05, + "loss": 1.2121, + "step": 76300 + }, + { + "epoch": 1.06, + "learning_rate": 2.4680477577007205e-05, + "loss": 1.2092, + "step": 76400 + }, + { + "epoch": 1.07, + "learning_rate": 2.4673511751347888e-05, + "loss": 1.2415, + "step": 76500 + }, + { + "epoch": 1.07, + "learning_rate": 2.4666545925688574e-05, + "loss": 1.219, + "step": 76600 + }, + { + "epoch": 1.07, + "learning_rate": 2.465958010002926e-05, + "loss": 1.2309, + "step": 76700 + }, + { + "epoch": 1.07, + "learning_rate": 2.465261427436994e-05, + "loss": 1.2061, + "step": 76800 + }, + { + "epoch": 1.07, + "learning_rate": 2.4645648448710626e-05, + "loss": 1.2408, + "step": 76900 + }, + { + "epoch": 1.07, + "learning_rate": 2.463868262305131e-05, + "loss": 1.1982, + "step": 77000 + }, + { + "epoch": 1.07, + "learning_rate": 2.4631716797391996e-05, + "loss": 1.2145, + "step": 77100 + }, + { + "epoch": 1.08, + "learning_rate": 2.462475097173268e-05, + "loss": 1.2393, + "step": 77200 + }, + { + "epoch": 1.08, + "learning_rate": 2.4617785146073365e-05, + "loss": 1.2282, + "step": 77300 + }, + { + "epoch": 1.08, + "learning_rate": 2.4610819320414048e-05, + "loss": 1.2315, + "step": 77400 + }, + { + "epoch": 1.08, + "learning_rate": 2.4603853494754734e-05, + "loss": 1.1694, + "step": 77500 + }, + { + "epoch": 1.08, + "learning_rate": 2.4596887669095417e-05, + "loss": 1.2348, + "step": 77600 + }, + { + "epoch": 1.08, + "learning_rate": 2.4589921843436104e-05, + "loss": 1.2112, + "step": 77700 + }, + { + "epoch": 1.08, + "learning_rate": 2.4582956017776787e-05, + "loss": 1.1807, + "step": 77800 + }, + { + "epoch": 1.09, + "learning_rate": 2.4575990192117473e-05, + "loss": 1.197, + "step": 77900 + }, + { + "epoch": 1.09, + "learning_rate": 2.4569024366458156e-05, + "loss": 1.2154, + "step": 78000 + }, + { + "epoch": 1.09, + "learning_rate": 2.4562058540798843e-05, + "loss": 1.2056, + "step": 78100 + }, + { + "epoch": 1.09, + "learning_rate": 2.455516237339612e-05, + "loss": 1.23, + "step": 78200 + }, + { + "epoch": 1.09, + "learning_rate": 2.4548196547736805e-05, + "loss": 1.2178, + "step": 78300 + }, + { + "epoch": 1.09, + "learning_rate": 2.4541230722077488e-05, + "loss": 1.1859, + "step": 78400 + }, + { + "epoch": 1.09, + "learning_rate": 2.4534264896418175e-05, + "loss": 1.2113, + "step": 78500 + }, + { + "epoch": 1.1, + "learning_rate": 2.4527299070758858e-05, + "loss": 1.1367, + "step": 78600 + }, + { + "epoch": 1.1, + "learning_rate": 2.4520333245099544e-05, + "loss": 1.2035, + "step": 78700 + }, + { + "epoch": 1.1, + "learning_rate": 2.4513367419440227e-05, + "loss": 1.1868, + "step": 78800 + }, + { + "epoch": 1.1, + "learning_rate": 2.4506401593780913e-05, + "loss": 1.2222, + "step": 78900 + }, + { + "epoch": 1.1, + "learning_rate": 2.4499435768121596e-05, + "loss": 1.1801, + "step": 79000 + }, + { + "epoch": 1.1, + "learning_rate": 2.4492469942462283e-05, + "loss": 1.2566, + "step": 79100 + }, + { + "epoch": 1.1, + "learning_rate": 2.4485504116802962e-05, + "loss": 1.2105, + "step": 79200 + }, + { + "epoch": 1.1, + "learning_rate": 2.447853829114365e-05, + "loss": 1.209, + "step": 79300 + }, + { + "epoch": 1.11, + "learning_rate": 2.4471642123740925e-05, + "loss": 1.2341, + "step": 79400 + }, + { + "epoch": 1.11, + "learning_rate": 2.446467629808161e-05, + "loss": 1.212, + "step": 79500 + }, + { + "epoch": 1.11, + "learning_rate": 2.4457710472422295e-05, + "loss": 1.2183, + "step": 79600 + }, + { + "epoch": 1.11, + "learning_rate": 2.445074464676298e-05, + "loss": 1.2142, + "step": 79700 + }, + { + "epoch": 1.11, + "learning_rate": 2.4443778821103664e-05, + "loss": 1.2149, + "step": 79800 + }, + { + "epoch": 1.11, + "learning_rate": 2.443681299544435e-05, + "loss": 1.2276, + "step": 79900 + }, + { + "epoch": 1.11, + "learning_rate": 2.4429847169785033e-05, + "loss": 1.2213, + "step": 80000 + }, + { + "epoch": 1.12, + "learning_rate": 2.442288134412572e-05, + "loss": 1.2164, + "step": 80100 + }, + { + "epoch": 1.12, + "learning_rate": 2.4415915518466403e-05, + "loss": 1.2111, + "step": 80200 + }, + { + "epoch": 1.12, + "learning_rate": 2.440894969280709e-05, + "loss": 1.2367, + "step": 80300 + }, + { + "epoch": 1.12, + "learning_rate": 2.4401983867147772e-05, + "loss": 1.1896, + "step": 80400 + }, + { + "epoch": 1.12, + "learning_rate": 2.439501804148846e-05, + "loss": 1.2185, + "step": 80500 + }, + { + "epoch": 1.12, + "learning_rate": 2.438805221582914e-05, + "loss": 1.2104, + "step": 80600 + }, + { + "epoch": 1.12, + "learning_rate": 2.4381086390169828e-05, + "loss": 1.1925, + "step": 80700 + }, + { + "epoch": 1.13, + "learning_rate": 2.4374120564510514e-05, + "loss": 1.2154, + "step": 80800 + }, + { + "epoch": 1.13, + "learning_rate": 2.4367154738851197e-05, + "loss": 1.189, + "step": 80900 + }, + { + "epoch": 1.13, + "learning_rate": 2.4360188913191884e-05, + "loss": 1.2177, + "step": 81000 + }, + { + "epoch": 1.13, + "learning_rate": 2.4353223087532566e-05, + "loss": 1.2071, + "step": 81100 + }, + { + "epoch": 1.13, + "learning_rate": 2.4346257261873253e-05, + "loss": 1.2237, + "step": 81200 + }, + { + "epoch": 1.13, + "learning_rate": 2.4339291436213936e-05, + "loss": 1.2119, + "step": 81300 + }, + { + "epoch": 1.13, + "learning_rate": 2.4332325610554622e-05, + "loss": 1.2448, + "step": 81400 + }, + { + "epoch": 1.14, + "learning_rate": 2.4325359784895305e-05, + "loss": 1.1896, + "step": 81500 + }, + { + "epoch": 1.14, + "learning_rate": 2.431839395923599e-05, + "loss": 1.1852, + "step": 81600 + }, + { + "epoch": 1.14, + "learning_rate": 2.431142813357667e-05, + "loss": 1.2208, + "step": 81700 + }, + { + "epoch": 1.14, + "learning_rate": 2.4304462307917358e-05, + "loss": 1.2055, + "step": 81800 + }, + { + "epoch": 1.14, + "learning_rate": 2.429749648225804e-05, + "loss": 1.2003, + "step": 81900 + }, + { + "epoch": 1.14, + "learning_rate": 2.4290530656598727e-05, + "loss": 1.2091, + "step": 82000 + }, + { + "epoch": 1.14, + "learning_rate": 2.428356483093941e-05, + "loss": 1.1914, + "step": 82100 + }, + { + "epoch": 1.15, + "learning_rate": 2.4276599005280096e-05, + "loss": 1.2248, + "step": 82200 + }, + { + "epoch": 1.15, + "learning_rate": 2.426963317962078e-05, + "loss": 1.2079, + "step": 82300 + }, + { + "epoch": 1.15, + "learning_rate": 2.4262667353961466e-05, + "loss": 1.2115, + "step": 82400 + }, + { + "epoch": 1.15, + "learning_rate": 2.425570152830215e-05, + "loss": 1.1846, + "step": 82500 + }, + { + "epoch": 1.15, + "learning_rate": 2.4248735702642835e-05, + "loss": 1.196, + "step": 82600 + }, + { + "epoch": 1.15, + "learning_rate": 2.4241769876983518e-05, + "loss": 1.2216, + "step": 82700 + }, + { + "epoch": 1.15, + "learning_rate": 2.4234804051324204e-05, + "loss": 1.1833, + "step": 82800 + }, + { + "epoch": 1.15, + "learning_rate": 2.4227838225664887e-05, + "loss": 1.2104, + "step": 82900 + }, + { + "epoch": 1.16, + "learning_rate": 2.4220872400005574e-05, + "loss": 1.2132, + "step": 83000 + }, + { + "epoch": 1.16, + "learning_rate": 2.4213906574346257e-05, + "loss": 1.2086, + "step": 83100 + }, + { + "epoch": 1.16, + "learning_rate": 2.4206940748686943e-05, + "loss": 1.1941, + "step": 83200 + }, + { + "epoch": 1.16, + "learning_rate": 2.419997492302763e-05, + "loss": 1.191, + "step": 83300 + }, + { + "epoch": 1.16, + "learning_rate": 2.4193009097368313e-05, + "loss": 1.1874, + "step": 83400 + }, + { + "epoch": 1.16, + "learning_rate": 2.4186043271709e-05, + "loss": 1.2118, + "step": 83500 + }, + { + "epoch": 1.16, + "learning_rate": 2.4179077446049682e-05, + "loss": 1.1804, + "step": 83600 + }, + { + "epoch": 1.17, + "learning_rate": 2.4172111620390368e-05, + "loss": 1.1734, + "step": 83700 + }, + { + "epoch": 1.17, + "learning_rate": 2.4165215452987645e-05, + "loss": 1.1996, + "step": 83800 + }, + { + "epoch": 1.17, + "learning_rate": 2.4158249627328328e-05, + "loss": 1.156, + "step": 83900 + }, + { + "epoch": 1.17, + "learning_rate": 2.4151283801669014e-05, + "loss": 1.2506, + "step": 84000 + }, + { + "epoch": 1.17, + "learning_rate": 2.4144317976009697e-05, + "loss": 1.2064, + "step": 84100 + }, + { + "epoch": 1.17, + "learning_rate": 2.413735215035038e-05, + "loss": 1.1956, + "step": 84200 + }, + { + "epoch": 1.17, + "learning_rate": 2.4130386324691066e-05, + "loss": 1.2134, + "step": 84300 + }, + { + "epoch": 1.18, + "learning_rate": 2.412342049903175e-05, + "loss": 1.1889, + "step": 84400 + }, + { + "epoch": 1.18, + "learning_rate": 2.4116454673372436e-05, + "loss": 1.2413, + "step": 84500 + }, + { + "epoch": 1.18, + "learning_rate": 2.410948884771312e-05, + "loss": 1.1893, + "step": 84600 + }, + { + "epoch": 1.18, + "learning_rate": 2.4102523022053805e-05, + "loss": 1.2346, + "step": 84700 + }, + { + "epoch": 1.18, + "learning_rate": 2.4095557196394488e-05, + "loss": 1.2005, + "step": 84800 + }, + { + "epoch": 1.18, + "learning_rate": 2.4088591370735175e-05, + "loss": 1.2187, + "step": 84900 + }, + { + "epoch": 1.18, + "learning_rate": 2.4081625545075858e-05, + "loss": 1.2475, + "step": 85000 + }, + { + "epoch": 1.19, + "learning_rate": 2.4074659719416544e-05, + "loss": 1.1655, + "step": 85100 + }, + { + "epoch": 1.19, + "learning_rate": 2.4067693893757227e-05, + "loss": 1.2359, + "step": 85200 + }, + { + "epoch": 1.19, + "learning_rate": 2.4060728068097913e-05, + "loss": 1.2474, + "step": 85300 + }, + { + "epoch": 1.19, + "learning_rate": 2.4053762242438596e-05, + "loss": 1.2113, + "step": 85400 + }, + { + "epoch": 1.19, + "learning_rate": 2.4046796416779283e-05, + "loss": 1.2017, + "step": 85500 + }, + { + "epoch": 1.19, + "learning_rate": 2.4039830591119966e-05, + "loss": 1.1962, + "step": 85600 + }, + { + "epoch": 1.19, + "learning_rate": 2.4032864765460652e-05, + "loss": 1.2242, + "step": 85700 + }, + { + "epoch": 1.2, + "learning_rate": 2.4025898939801335e-05, + "loss": 1.1803, + "step": 85800 + }, + { + "epoch": 1.2, + "learning_rate": 2.4019002772398615e-05, + "loss": 1.2171, + "step": 85900 + }, + { + "epoch": 1.2, + "learning_rate": 2.4012036946739298e-05, + "loss": 1.2158, + "step": 86000 + }, + { + "epoch": 1.2, + "learning_rate": 2.4005071121079984e-05, + "loss": 1.2223, + "step": 86100 + }, + { + "epoch": 1.2, + "learning_rate": 2.3998105295420667e-05, + "loss": 1.234, + "step": 86200 + }, + { + "epoch": 1.2, + "learning_rate": 2.3991139469761354e-05, + "loss": 1.1993, + "step": 86300 + }, + { + "epoch": 1.2, + "learning_rate": 2.3984173644102037e-05, + "loss": 1.2003, + "step": 86400 + }, + { + "epoch": 1.21, + "learning_rate": 2.397720781844272e-05, + "loss": 1.2102, + "step": 86500 + }, + { + "epoch": 1.21, + "learning_rate": 2.3970241992783402e-05, + "loss": 1.1924, + "step": 86600 + }, + { + "epoch": 1.21, + "learning_rate": 2.396327616712409e-05, + "loss": 1.2074, + "step": 86700 + }, + { + "epoch": 1.21, + "learning_rate": 2.3956310341464772e-05, + "loss": 1.2076, + "step": 86800 + }, + { + "epoch": 1.21, + "learning_rate": 2.3949344515805458e-05, + "loss": 1.1719, + "step": 86900 + }, + { + "epoch": 1.21, + "learning_rate": 2.394237869014614e-05, + "loss": 1.171, + "step": 87000 + }, + { + "epoch": 1.21, + "learning_rate": 2.3935412864486828e-05, + "loss": 1.2379, + "step": 87100 + }, + { + "epoch": 1.21, + "learning_rate": 2.3928447038827514e-05, + "loss": 1.1785, + "step": 87200 + }, + { + "epoch": 1.22, + "learning_rate": 2.3921481213168197e-05, + "loss": 1.2119, + "step": 87300 + }, + { + "epoch": 1.22, + "learning_rate": 2.3914515387508883e-05, + "loss": 1.1969, + "step": 87400 + }, + { + "epoch": 1.22, + "learning_rate": 2.3907549561849566e-05, + "loss": 1.1893, + "step": 87500 + }, + { + "epoch": 1.22, + "learning_rate": 2.3900583736190253e-05, + "loss": 1.2106, + "step": 87600 + }, + { + "epoch": 1.22, + "learning_rate": 2.3893617910530936e-05, + "loss": 1.2031, + "step": 87700 + }, + { + "epoch": 1.22, + "learning_rate": 2.3886652084871622e-05, + "loss": 1.2224, + "step": 87800 + }, + { + "epoch": 1.22, + "learning_rate": 2.3879686259212305e-05, + "loss": 1.1619, + "step": 87900 + }, + { + "epoch": 1.23, + "learning_rate": 2.387279009180958e-05, + "loss": 1.2235, + "step": 88000 + }, + { + "epoch": 1.23, + "learning_rate": 2.3865824266150268e-05, + "loss": 1.2187, + "step": 88100 + }, + { + "epoch": 1.23, + "learning_rate": 2.3858858440490954e-05, + "loss": 1.1788, + "step": 88200 + }, + { + "epoch": 1.23, + "learning_rate": 2.385196227308823e-05, + "loss": 1.1814, + "step": 88300 + }, + { + "epoch": 1.23, + "learning_rate": 2.3844996447428914e-05, + "loss": 1.1988, + "step": 88400 + }, + { + "epoch": 1.23, + "learning_rate": 2.38380306217696e-05, + "loss": 1.1783, + "step": 88500 + }, + { + "epoch": 1.23, + "learning_rate": 2.3831064796110283e-05, + "loss": 1.1934, + "step": 88600 + }, + { + "epoch": 1.24, + "learning_rate": 2.382409897045097e-05, + "loss": 1.2138, + "step": 88700 + }, + { + "epoch": 1.24, + "learning_rate": 2.3817133144791652e-05, + "loss": 1.2141, + "step": 88800 + }, + { + "epoch": 1.24, + "learning_rate": 2.381016731913234e-05, + "loss": 1.262, + "step": 88900 + }, + { + "epoch": 1.24, + "learning_rate": 2.3803201493473025e-05, + "loss": 1.2314, + "step": 89000 + }, + { + "epoch": 1.24, + "learning_rate": 2.3796235667813705e-05, + "loss": 1.1988, + "step": 89100 + }, + { + "epoch": 1.24, + "learning_rate": 2.378926984215439e-05, + "loss": 1.2241, + "step": 89200 + }, + { + "epoch": 1.24, + "learning_rate": 2.3782304016495074e-05, + "loss": 1.2096, + "step": 89300 + }, + { + "epoch": 1.25, + "learning_rate": 2.377533819083576e-05, + "loss": 1.2187, + "step": 89400 + }, + { + "epoch": 1.25, + "learning_rate": 2.3768372365176443e-05, + "loss": 1.1916, + "step": 89500 + }, + { + "epoch": 1.25, + "learning_rate": 2.376140653951713e-05, + "loss": 1.1877, + "step": 89600 + }, + { + "epoch": 1.25, + "learning_rate": 2.3754440713857813e-05, + "loss": 1.2055, + "step": 89700 + }, + { + "epoch": 1.25, + "learning_rate": 2.37474748881985e-05, + "loss": 1.2144, + "step": 89800 + }, + { + "epoch": 1.25, + "learning_rate": 2.3740509062539182e-05, + "loss": 1.2193, + "step": 89900 + }, + { + "epoch": 1.25, + "learning_rate": 2.373354323687987e-05, + "loss": 1.1965, + "step": 90000 + }, + { + "epoch": 1.26, + "learning_rate": 2.372657741122055e-05, + "loss": 1.2008, + "step": 90100 + }, + { + "epoch": 1.26, + "learning_rate": 2.3719611585561238e-05, + "loss": 1.2098, + "step": 90200 + }, + { + "epoch": 1.26, + "learning_rate": 2.371264575990192e-05, + "loss": 1.1948, + "step": 90300 + }, + { + "epoch": 1.26, + "learning_rate": 2.3705679934242607e-05, + "loss": 1.1907, + "step": 90400 + }, + { + "epoch": 1.26, + "learning_rate": 2.369871410858329e-05, + "loss": 1.2195, + "step": 90500 + }, + { + "epoch": 1.26, + "learning_rate": 2.3691748282923977e-05, + "loss": 1.195, + "step": 90600 + }, + { + "epoch": 1.26, + "learning_rate": 2.368478245726466e-05, + "loss": 1.2201, + "step": 90700 + }, + { + "epoch": 1.26, + "learning_rate": 2.3677816631605346e-05, + "loss": 1.1833, + "step": 90800 + }, + { + "epoch": 1.27, + "learning_rate": 2.367085080594603e-05, + "loss": 1.2109, + "step": 90900 + }, + { + "epoch": 1.27, + "learning_rate": 2.3663884980286715e-05, + "loss": 1.2184, + "step": 91000 + }, + { + "epoch": 1.27, + "learning_rate": 2.36569191546274e-05, + "loss": 1.2485, + "step": 91100 + }, + { + "epoch": 1.27, + "learning_rate": 2.3649953328968085e-05, + "loss": 1.2013, + "step": 91200 + }, + { + "epoch": 1.27, + "learning_rate": 2.3642987503308768e-05, + "loss": 1.2579, + "step": 91300 + }, + { + "epoch": 1.27, + "learning_rate": 2.363602167764945e-05, + "loss": 1.2107, + "step": 91400 + }, + { + "epoch": 1.27, + "learning_rate": 2.3629055851990137e-05, + "loss": 1.1716, + "step": 91500 + }, + { + "epoch": 1.28, + "learning_rate": 2.362209002633082e-05, + "loss": 1.2439, + "step": 91600 + }, + { + "epoch": 1.28, + "learning_rate": 2.3615124200671507e-05, + "loss": 1.1907, + "step": 91700 + }, + { + "epoch": 1.28, + "learning_rate": 2.360815837501219e-05, + "loss": 1.2171, + "step": 91800 + }, + { + "epoch": 1.28, + "learning_rate": 2.3601192549352876e-05, + "loss": 1.1806, + "step": 91900 + }, + { + "epoch": 1.28, + "learning_rate": 2.3594296381950152e-05, + "loss": 1.1918, + "step": 92000 + }, + { + "epoch": 1.28, + "learning_rate": 2.358733055629084e-05, + "loss": 1.2124, + "step": 92100 + }, + { + "epoch": 1.28, + "learning_rate": 2.358036473063152e-05, + "loss": 1.2193, + "step": 92200 + }, + { + "epoch": 1.29, + "learning_rate": 2.3573398904972208e-05, + "loss": 1.1929, + "step": 92300 + }, + { + "epoch": 1.29, + "learning_rate": 2.356643307931289e-05, + "loss": 1.2022, + "step": 92400 + }, + { + "epoch": 1.29, + "learning_rate": 2.3559467253653577e-05, + "loss": 1.2052, + "step": 92500 + }, + { + "epoch": 1.29, + "learning_rate": 2.355250142799426e-05, + "loss": 1.1878, + "step": 92600 + }, + { + "epoch": 1.29, + "learning_rate": 2.3545535602334947e-05, + "loss": 1.1895, + "step": 92700 + }, + { + "epoch": 1.29, + "learning_rate": 2.353856977667563e-05, + "loss": 1.2043, + "step": 92800 + }, + { + "epoch": 1.29, + "learning_rate": 2.3531603951016316e-05, + "loss": 1.1816, + "step": 92900 + }, + { + "epoch": 1.3, + "learning_rate": 2.3524638125357e-05, + "loss": 1.234, + "step": 93000 + }, + { + "epoch": 1.3, + "learning_rate": 2.3517672299697686e-05, + "loss": 1.2092, + "step": 93100 + }, + { + "epoch": 1.3, + "learning_rate": 2.351070647403837e-05, + "loss": 1.2086, + "step": 93200 + }, + { + "epoch": 1.3, + "learning_rate": 2.3503740648379055e-05, + "loss": 1.1953, + "step": 93300 + }, + { + "epoch": 1.3, + "learning_rate": 2.3496774822719738e-05, + "loss": 1.2012, + "step": 93400 + }, + { + "epoch": 1.3, + "learning_rate": 2.3489808997060424e-05, + "loss": 1.2169, + "step": 93500 + }, + { + "epoch": 1.3, + "learning_rate": 2.3482843171401107e-05, + "loss": 1.2498, + "step": 93600 + }, + { + "epoch": 1.31, + "learning_rate": 2.3475877345741794e-05, + "loss": 1.2184, + "step": 93700 + }, + { + "epoch": 1.31, + "learning_rate": 2.3468911520082473e-05, + "loss": 1.1944, + "step": 93800 + }, + { + "epoch": 1.31, + "learning_rate": 2.346194569442316e-05, + "loss": 1.2077, + "step": 93900 + }, + { + "epoch": 1.31, + "learning_rate": 2.3454979868763843e-05, + "loss": 1.1873, + "step": 94000 + }, + { + "epoch": 1.31, + "learning_rate": 2.344801404310453e-05, + "loss": 1.206, + "step": 94100 + }, + { + "epoch": 1.31, + "learning_rate": 2.3441048217445212e-05, + "loss": 1.1979, + "step": 94200 + }, + { + "epoch": 1.31, + "learning_rate": 2.34340823917859e-05, + "loss": 1.2447, + "step": 94300 + }, + { + "epoch": 1.32, + "learning_rate": 2.342711656612658e-05, + "loss": 1.1973, + "step": 94400 + }, + { + "epoch": 1.32, + "learning_rate": 2.3420150740467268e-05, + "loss": 1.2511, + "step": 94500 + }, + { + "epoch": 1.32, + "learning_rate": 2.3413184914807954e-05, + "loss": 1.2381, + "step": 94600 + }, + { + "epoch": 1.32, + "learning_rate": 2.3406219089148637e-05, + "loss": 1.2367, + "step": 94700 + }, + { + "epoch": 1.32, + "learning_rate": 2.3399253263489323e-05, + "loss": 1.2305, + "step": 94800 + }, + { + "epoch": 1.32, + "learning_rate": 2.3392287437830006e-05, + "loss": 1.2151, + "step": 94900 + }, + { + "epoch": 1.32, + "learning_rate": 2.3385321612170693e-05, + "loss": 1.2041, + "step": 95000 + }, + { + "epoch": 1.32, + "learning_rate": 2.3378355786511376e-05, + "loss": 1.2184, + "step": 95100 + }, + { + "epoch": 1.33, + "learning_rate": 2.3371389960852062e-05, + "loss": 1.2311, + "step": 95200 + }, + { + "epoch": 1.33, + "learning_rate": 2.3364424135192745e-05, + "loss": 1.186, + "step": 95300 + }, + { + "epoch": 1.33, + "learning_rate": 2.335745830953343e-05, + "loss": 1.1747, + "step": 95400 + }, + { + "epoch": 1.33, + "learning_rate": 2.3350492483874115e-05, + "loss": 1.2108, + "step": 95500 + }, + { + "epoch": 1.33, + "learning_rate": 2.33435266582148e-05, + "loss": 1.197, + "step": 95600 + }, + { + "epoch": 1.33, + "learning_rate": 2.3336560832555484e-05, + "loss": 1.2411, + "step": 95700 + }, + { + "epoch": 1.33, + "learning_rate": 2.332959500689617e-05, + "loss": 1.1862, + "step": 95800 + }, + { + "epoch": 1.34, + "learning_rate": 2.3322629181236853e-05, + "loss": 1.2287, + "step": 95900 + }, + { + "epoch": 1.34, + "learning_rate": 2.3315663355577536e-05, + "loss": 1.1929, + "step": 96000 + }, + { + "epoch": 1.34, + "learning_rate": 2.3308767188174816e-05, + "loss": 1.1839, + "step": 96100 + }, + { + "epoch": 1.34, + "learning_rate": 2.33018013625155e-05, + "loss": 1.2291, + "step": 96200 + }, + { + "epoch": 1.34, + "learning_rate": 2.3294835536856182e-05, + "loss": 1.2131, + "step": 96300 + }, + { + "epoch": 1.34, + "learning_rate": 2.328786971119687e-05, + "loss": 1.2304, + "step": 96400 + }, + { + "epoch": 1.34, + "learning_rate": 2.328090388553755e-05, + "loss": 1.1881, + "step": 96500 + }, + { + "epoch": 1.35, + "learning_rate": 2.3273938059878238e-05, + "loss": 1.2102, + "step": 96600 + }, + { + "epoch": 1.35, + "learning_rate": 2.326697223421892e-05, + "loss": 1.1755, + "step": 96700 + }, + { + "epoch": 1.35, + "learning_rate": 2.3260006408559607e-05, + "loss": 1.2057, + "step": 96800 + }, + { + "epoch": 1.35, + "learning_rate": 2.325304058290029e-05, + "loss": 1.2049, + "step": 96900 + }, + { + "epoch": 1.35, + "learning_rate": 2.3246074757240977e-05, + "loss": 1.2509, + "step": 97000 + }, + { + "epoch": 1.35, + "learning_rate": 2.323910893158166e-05, + "loss": 1.1966, + "step": 97100 + }, + { + "epoch": 1.35, + "learning_rate": 2.3232143105922346e-05, + "loss": 1.1735, + "step": 97200 + }, + { + "epoch": 1.36, + "learning_rate": 2.322517728026303e-05, + "loss": 1.1989, + "step": 97300 + }, + { + "epoch": 1.36, + "learning_rate": 2.321828111286031e-05, + "loss": 1.2423, + "step": 97400 + }, + { + "epoch": 1.36, + "learning_rate": 2.321131528720099e-05, + "loss": 1.2197, + "step": 97500 + }, + { + "epoch": 1.36, + "learning_rate": 2.3204349461541678e-05, + "loss": 1.2015, + "step": 97600 + }, + { + "epoch": 1.36, + "learning_rate": 2.319738363588236e-05, + "loss": 1.2008, + "step": 97700 + }, + { + "epoch": 1.36, + "learning_rate": 2.3190417810223047e-05, + "loss": 1.1893, + "step": 97800 + }, + { + "epoch": 1.36, + "learning_rate": 2.318345198456373e-05, + "loss": 1.2326, + "step": 97900 + }, + { + "epoch": 1.37, + "learning_rate": 2.3176486158904417e-05, + "loss": 1.2319, + "step": 98000 + }, + { + "epoch": 1.37, + "learning_rate": 2.31695203332451e-05, + "loss": 1.2047, + "step": 98100 + }, + { + "epoch": 1.37, + "learning_rate": 2.3162554507585786e-05, + "loss": 1.21, + "step": 98200 + }, + { + "epoch": 1.37, + "learning_rate": 2.315558868192647e-05, + "loss": 1.1928, + "step": 98300 + }, + { + "epoch": 1.37, + "learning_rate": 2.3148622856267156e-05, + "loss": 1.2059, + "step": 98400 + }, + { + "epoch": 1.37, + "learning_rate": 2.314165703060784e-05, + "loss": 1.2117, + "step": 98500 + }, + { + "epoch": 1.37, + "learning_rate": 2.313469120494852e-05, + "loss": 1.2217, + "step": 98600 + }, + { + "epoch": 1.38, + "learning_rate": 2.3127725379289208e-05, + "loss": 1.191, + "step": 98700 + }, + { + "epoch": 1.38, + "learning_rate": 2.312075955362989e-05, + "loss": 1.2245, + "step": 98800 + }, + { + "epoch": 1.38, + "learning_rate": 2.3113793727970577e-05, + "loss": 1.2086, + "step": 98900 + }, + { + "epoch": 1.38, + "learning_rate": 2.310682790231126e-05, + "loss": 1.1783, + "step": 99000 + }, + { + "epoch": 1.38, + "learning_rate": 2.3099862076651947e-05, + "loss": 1.2024, + "step": 99100 + }, + { + "epoch": 1.38, + "learning_rate": 2.309289625099263e-05, + "loss": 1.1831, + "step": 99200 + }, + { + "epoch": 1.38, + "learning_rate": 2.3085930425333316e-05, + "loss": 1.2045, + "step": 99300 + }, + { + "epoch": 1.38, + "learning_rate": 2.3078964599674e-05, + "loss": 1.1953, + "step": 99400 + }, + { + "epoch": 1.39, + "learning_rate": 2.3071998774014685e-05, + "loss": 1.1615, + "step": 99500 + }, + { + "epoch": 1.39, + "learning_rate": 2.306503294835537e-05, + "loss": 1.2064, + "step": 99600 + }, + { + "epoch": 1.39, + "learning_rate": 2.3058067122696055e-05, + "loss": 1.1868, + "step": 99700 + }, + { + "epoch": 1.39, + "learning_rate": 2.305117095529333e-05, + "loss": 1.1918, + "step": 99800 + }, + { + "epoch": 1.39, + "learning_rate": 2.3044205129634018e-05, + "loss": 1.2122, + "step": 99900 + }, + { + "epoch": 1.39, + "learning_rate": 2.30372393039747e-05, + "loss": 1.1998, + "step": 100000 + }, + { + "epoch": 1.39, + "learning_rate": 2.3030273478315387e-05, + "loss": 1.2122, + "step": 100100 + }, + { + "epoch": 1.4, + "learning_rate": 2.302330765265607e-05, + "loss": 1.1924, + "step": 100200 + }, + { + "epoch": 1.4, + "learning_rate": 2.3016341826996756e-05, + "loss": 1.2294, + "step": 100300 + }, + { + "epoch": 1.4, + "learning_rate": 2.300937600133744e-05, + "loss": 1.198, + "step": 100400 + }, + { + "epoch": 1.4, + "learning_rate": 2.3002410175678126e-05, + "loss": 1.2307, + "step": 100500 + }, + { + "epoch": 1.4, + "learning_rate": 2.299544435001881e-05, + "loss": 1.1966, + "step": 100600 + }, + { + "epoch": 1.4, + "learning_rate": 2.2988478524359495e-05, + "loss": 1.2221, + "step": 100700 + }, + { + "epoch": 1.4, + "learning_rate": 2.2981512698700178e-05, + "loss": 1.2524, + "step": 100800 + }, + { + "epoch": 1.41, + "learning_rate": 2.2974546873040864e-05, + "loss": 1.2349, + "step": 100900 + }, + { + "epoch": 1.41, + "learning_rate": 2.2967581047381544e-05, + "loss": 1.185, + "step": 101000 + }, + { + "epoch": 1.41, + "learning_rate": 2.296061522172223e-05, + "loss": 1.1917, + "step": 101100 + }, + { + "epoch": 1.41, + "learning_rate": 2.2953649396062913e-05, + "loss": 1.1746, + "step": 101200 + }, + { + "epoch": 1.41, + "learning_rate": 2.29466835704036e-05, + "loss": 1.1778, + "step": 101300 + }, + { + "epoch": 1.41, + "learning_rate": 2.2939717744744283e-05, + "loss": 1.1927, + "step": 101400 + }, + { + "epoch": 1.41, + "learning_rate": 2.293275191908497e-05, + "loss": 1.2341, + "step": 101500 + }, + { + "epoch": 1.42, + "learning_rate": 2.2925786093425652e-05, + "loss": 1.209, + "step": 101600 + }, + { + "epoch": 1.42, + "learning_rate": 2.291882026776634e-05, + "loss": 1.2499, + "step": 101700 + }, + { + "epoch": 1.42, + "learning_rate": 2.291185444210702e-05, + "loss": 1.189, + "step": 101800 + }, + { + "epoch": 1.42, + "learning_rate": 2.2904888616447708e-05, + "loss": 1.1962, + "step": 101900 + }, + { + "epoch": 1.42, + "learning_rate": 2.2897922790788394e-05, + "loss": 1.2561, + "step": 102000 + }, + { + "epoch": 1.42, + "learning_rate": 2.2890956965129077e-05, + "loss": 1.2059, + "step": 102100 + }, + { + "epoch": 1.42, + "learning_rate": 2.2883991139469764e-05, + "loss": 1.1972, + "step": 102200 + }, + { + "epoch": 1.43, + "learning_rate": 2.2877025313810447e-05, + "loss": 1.1873, + "step": 102300 + }, + { + "epoch": 1.43, + "learning_rate": 2.2870059488151133e-05, + "loss": 1.2591, + "step": 102400 + }, + { + "epoch": 1.43, + "learning_rate": 2.2863093662491816e-05, + "loss": 1.2266, + "step": 102500 + }, + { + "epoch": 1.43, + "learning_rate": 2.2856127836832502e-05, + "loss": 1.2194, + "step": 102600 + }, + { + "epoch": 1.43, + "learning_rate": 2.2849162011173185e-05, + "loss": 1.2086, + "step": 102700 + }, + { + "epoch": 1.43, + "learning_rate": 2.284219618551387e-05, + "loss": 1.2317, + "step": 102800 + }, + { + "epoch": 1.43, + "learning_rate": 2.2835230359854555e-05, + "loss": 1.2056, + "step": 102900 + }, + { + "epoch": 1.43, + "learning_rate": 2.282826453419524e-05, + "loss": 1.2364, + "step": 103000 + }, + { + "epoch": 1.44, + "learning_rate": 2.2821298708535924e-05, + "loss": 1.1771, + "step": 103100 + }, + { + "epoch": 1.44, + "learning_rate": 2.281433288287661e-05, + "loss": 1.1979, + "step": 103200 + }, + { + "epoch": 1.44, + "learning_rate": 2.280736705721729e-05, + "loss": 1.2382, + "step": 103300 + }, + { + "epoch": 1.44, + "learning_rate": 2.2800401231557976e-05, + "loss": 1.1685, + "step": 103400 + }, + { + "epoch": 1.44, + "learning_rate": 2.279343540589866e-05, + "loss": 1.2082, + "step": 103500 + }, + { + "epoch": 1.44, + "learning_rate": 2.2786469580239346e-05, + "loss": 1.2266, + "step": 103600 + }, + { + "epoch": 1.44, + "learning_rate": 2.277950375458003e-05, + "loss": 1.2132, + "step": 103700 + }, + { + "epoch": 1.45, + "learning_rate": 2.2772537928920715e-05, + "loss": 1.1876, + "step": 103800 + }, + { + "epoch": 1.45, + "learning_rate": 2.2765572103261398e-05, + "loss": 1.222, + "step": 103900 + }, + { + "epoch": 1.45, + "learning_rate": 2.2758606277602084e-05, + "loss": 1.2023, + "step": 104000 + }, + { + "epoch": 1.45, + "learning_rate": 2.2751640451942767e-05, + "loss": 1.1668, + "step": 104100 + }, + { + "epoch": 1.45, + "learning_rate": 2.2744674626283454e-05, + "loss": 1.194, + "step": 104200 + }, + { + "epoch": 1.45, + "learning_rate": 2.2737708800624137e-05, + "loss": 1.2058, + "step": 104300 + }, + { + "epoch": 1.45, + "learning_rate": 2.2730742974964823e-05, + "loss": 1.185, + "step": 104400 + }, + { + "epoch": 1.46, + "learning_rate": 2.27238468075621e-05, + "loss": 1.1681, + "step": 104500 + }, + { + "epoch": 1.46, + "learning_rate": 2.2716880981902786e-05, + "loss": 1.2321, + "step": 104600 + }, + { + "epoch": 1.46, + "learning_rate": 2.270991515624347e-05, + "loss": 1.2104, + "step": 104700 + }, + { + "epoch": 1.46, + "learning_rate": 2.2702949330584155e-05, + "loss": 1.1615, + "step": 104800 + }, + { + "epoch": 1.46, + "learning_rate": 2.269598350492484e-05, + "loss": 1.2305, + "step": 104900 + }, + { + "epoch": 1.46, + "learning_rate": 2.2689017679265525e-05, + "loss": 1.2212, + "step": 105000 + }, + { + "epoch": 1.46, + "learning_rate": 2.2682051853606208e-05, + "loss": 1.1814, + "step": 105100 + }, + { + "epoch": 1.47, + "learning_rate": 2.2675086027946894e-05, + "loss": 1.2132, + "step": 105200 + }, + { + "epoch": 1.47, + "learning_rate": 2.266812020228758e-05, + "loss": 1.1871, + "step": 105300 + }, + { + "epoch": 1.47, + "learning_rate": 2.2661224034884857e-05, + "loss": 1.2022, + "step": 105400 + }, + { + "epoch": 1.47, + "learning_rate": 2.265425820922554e-05, + "loss": 1.2129, + "step": 105500 + }, + { + "epoch": 1.47, + "learning_rate": 2.2647292383566226e-05, + "loss": 1.2058, + "step": 105600 + }, + { + "epoch": 1.47, + "learning_rate": 2.264032655790691e-05, + "loss": 1.2106, + "step": 105700 + }, + { + "epoch": 1.47, + "learning_rate": 2.2633360732247596e-05, + "loss": 1.1971, + "step": 105800 + }, + { + "epoch": 1.48, + "learning_rate": 2.2626394906588275e-05, + "loss": 1.1881, + "step": 105900 + }, + { + "epoch": 1.48, + "learning_rate": 2.261942908092896e-05, + "loss": 1.1922, + "step": 106000 + }, + { + "epoch": 1.48, + "learning_rate": 2.2612463255269648e-05, + "loss": 1.2067, + "step": 106100 + }, + { + "epoch": 1.48, + "learning_rate": 2.260549742961033e-05, + "loss": 1.2249, + "step": 106200 + }, + { + "epoch": 1.48, + "learning_rate": 2.2598531603951017e-05, + "loss": 1.1895, + "step": 106300 + }, + { + "epoch": 1.48, + "learning_rate": 2.25915657782917e-05, + "loss": 1.1837, + "step": 106400 + }, + { + "epoch": 1.48, + "learning_rate": 2.2584599952632387e-05, + "loss": 1.2043, + "step": 106500 + }, + { + "epoch": 1.49, + "learning_rate": 2.257763412697307e-05, + "loss": 1.2104, + "step": 106600 + }, + { + "epoch": 1.49, + "learning_rate": 2.2570668301313756e-05, + "loss": 1.2394, + "step": 106700 + }, + { + "epoch": 1.49, + "learning_rate": 2.256370247565444e-05, + "loss": 1.1965, + "step": 106800 + }, + { + "epoch": 1.49, + "learning_rate": 2.2556736649995125e-05, + "loss": 1.2418, + "step": 106900 + }, + { + "epoch": 1.49, + "learning_rate": 2.254977082433581e-05, + "loss": 1.2125, + "step": 107000 + }, + { + "epoch": 1.49, + "learning_rate": 2.2542804998676495e-05, + "loss": 1.2428, + "step": 107100 + }, + { + "epoch": 1.49, + "learning_rate": 2.2535839173017178e-05, + "loss": 1.2245, + "step": 107200 + }, + { + "epoch": 1.49, + "learning_rate": 2.2528873347357864e-05, + "loss": 1.1913, + "step": 107300 + }, + { + "epoch": 1.5, + "learning_rate": 2.2521907521698547e-05, + "loss": 1.1843, + "step": 107400 + }, + { + "epoch": 1.5, + "learning_rate": 2.2514941696039234e-05, + "loss": 1.2339, + "step": 107500 + }, + { + "epoch": 1.5, + "learning_rate": 2.2507975870379917e-05, + "loss": 1.1878, + "step": 107600 + }, + { + "epoch": 1.5, + "learning_rate": 2.2501010044720603e-05, + "loss": 1.2216, + "step": 107700 + }, + { + "epoch": 1.5, + "learning_rate": 2.2494044219061286e-05, + "loss": 1.1915, + "step": 107800 + }, + { + "epoch": 1.5, + "learning_rate": 2.2487078393401972e-05, + "loss": 1.1568, + "step": 107900 + }, + { + "epoch": 1.5, + "learning_rate": 2.2480112567742655e-05, + "loss": 1.2237, + "step": 108000 + }, + { + "epoch": 1.51, + "learning_rate": 2.2473146742083338e-05, + "loss": 1.1775, + "step": 108100 + }, + { + "epoch": 1.51, + "learning_rate": 2.246618091642402e-05, + "loss": 1.2169, + "step": 108200 + }, + { + "epoch": 1.51, + "learning_rate": 2.2459215090764708e-05, + "loss": 1.1817, + "step": 108300 + }, + { + "epoch": 1.51, + "learning_rate": 2.2452249265105394e-05, + "loss": 1.2129, + "step": 108400 + }, + { + "epoch": 1.51, + "learning_rate": 2.2445283439446077e-05, + "loss": 1.2335, + "step": 108500 + }, + { + "epoch": 1.51, + "learning_rate": 2.2438317613786763e-05, + "loss": 1.2294, + "step": 108600 + }, + { + "epoch": 1.51, + "learning_rate": 2.2431351788127446e-05, + "loss": 1.1848, + "step": 108700 + }, + { + "epoch": 1.52, + "learning_rate": 2.2424385962468133e-05, + "loss": 1.1672, + "step": 108800 + }, + { + "epoch": 1.52, + "learning_rate": 2.2417420136808816e-05, + "loss": 1.194, + "step": 108900 + }, + { + "epoch": 1.52, + "learning_rate": 2.2410454311149502e-05, + "loss": 1.2135, + "step": 109000 + }, + { + "epoch": 1.52, + "learning_rate": 2.2403488485490185e-05, + "loss": 1.2033, + "step": 109100 + }, + { + "epoch": 1.52, + "learning_rate": 2.239659231808746e-05, + "loss": 1.2111, + "step": 109200 + }, + { + "epoch": 1.52, + "learning_rate": 2.2389626492428148e-05, + "loss": 1.216, + "step": 109300 + }, + { + "epoch": 1.52, + "learning_rate": 2.2382660666768834e-05, + "loss": 1.2526, + "step": 109400 + }, + { + "epoch": 1.53, + "learning_rate": 2.2375694841109517e-05, + "loss": 1.172, + "step": 109500 + }, + { + "epoch": 1.53, + "learning_rate": 2.2368729015450204e-05, + "loss": 1.1855, + "step": 109600 + }, + { + "epoch": 1.53, + "learning_rate": 2.2361763189790887e-05, + "loss": 1.2075, + "step": 109700 + }, + { + "epoch": 1.53, + "learning_rate": 2.2354797364131573e-05, + "loss": 1.1811, + "step": 109800 + }, + { + "epoch": 1.53, + "learning_rate": 2.2347831538472256e-05, + "loss": 1.1902, + "step": 109900 + }, + { + "epoch": 1.53, + "learning_rate": 2.2340865712812942e-05, + "loss": 1.2013, + "step": 110000 + }, + { + "epoch": 1.53, + "learning_rate": 2.2333899887153625e-05, + "loss": 1.1856, + "step": 110100 + }, + { + "epoch": 1.54, + "learning_rate": 2.2326934061494312e-05, + "loss": 1.1709, + "step": 110200 + }, + { + "epoch": 1.54, + "learning_rate": 2.2319968235834995e-05, + "loss": 1.1757, + "step": 110300 + }, + { + "epoch": 1.54, + "learning_rate": 2.231300241017568e-05, + "loss": 1.1821, + "step": 110400 + }, + { + "epoch": 1.54, + "learning_rate": 2.230603658451636e-05, + "loss": 1.2171, + "step": 110500 + }, + { + "epoch": 1.54, + "learning_rate": 2.2299070758857047e-05, + "loss": 1.1986, + "step": 110600 + }, + { + "epoch": 1.54, + "learning_rate": 2.229210493319773e-05, + "loss": 1.206, + "step": 110700 + }, + { + "epoch": 1.54, + "learning_rate": 2.2285139107538416e-05, + "loss": 1.2128, + "step": 110800 + }, + { + "epoch": 1.55, + "learning_rate": 2.22781732818791e-05, + "loss": 1.1754, + "step": 110900 + }, + { + "epoch": 1.55, + "learning_rate": 2.2271207456219786e-05, + "loss": 1.172, + "step": 111000 + }, + { + "epoch": 1.55, + "learning_rate": 2.226424163056047e-05, + "loss": 1.1641, + "step": 111100 + }, + { + "epoch": 1.55, + "learning_rate": 2.2257275804901155e-05, + "loss": 1.2322, + "step": 111200 + }, + { + "epoch": 1.55, + "learning_rate": 2.2250309979241838e-05, + "loss": 1.215, + "step": 111300 + }, + { + "epoch": 1.55, + "learning_rate": 2.2243344153582525e-05, + "loss": 1.2181, + "step": 111400 + }, + { + "epoch": 1.55, + "learning_rate": 2.2236378327923208e-05, + "loss": 1.2111, + "step": 111500 + }, + { + "epoch": 1.55, + "learning_rate": 2.2229412502263894e-05, + "loss": 1.2139, + "step": 111600 + }, + { + "epoch": 1.56, + "learning_rate": 2.222244667660458e-05, + "loss": 1.2265, + "step": 111700 + }, + { + "epoch": 1.56, + "learning_rate": 2.2215480850945263e-05, + "loss": 1.1916, + "step": 111800 + }, + { + "epoch": 1.56, + "learning_rate": 2.220851502528595e-05, + "loss": 1.2285, + "step": 111900 + }, + { + "epoch": 1.56, + "learning_rate": 2.2201549199626633e-05, + "loss": 1.2045, + "step": 112000 + }, + { + "epoch": 1.56, + "learning_rate": 2.219458337396732e-05, + "loss": 1.1994, + "step": 112100 + }, + { + "epoch": 1.56, + "learning_rate": 2.2187617548308002e-05, + "loss": 1.2142, + "step": 112200 + }, + { + "epoch": 1.56, + "learning_rate": 2.218065172264869e-05, + "loss": 1.1919, + "step": 112300 + }, + { + "epoch": 1.57, + "learning_rate": 2.217368589698937e-05, + "loss": 1.2523, + "step": 112400 + }, + { + "epoch": 1.57, + "learning_rate": 2.2166720071330058e-05, + "loss": 1.2212, + "step": 112500 + }, + { + "epoch": 1.57, + "learning_rate": 2.215975424567074e-05, + "loss": 1.2299, + "step": 112600 + }, + { + "epoch": 1.57, + "learning_rate": 2.2152788420011427e-05, + "loss": 1.2114, + "step": 112700 + }, + { + "epoch": 1.57, + "learning_rate": 2.2145822594352107e-05, + "loss": 1.2329, + "step": 112800 + }, + { + "epoch": 1.57, + "learning_rate": 2.2138856768692793e-05, + "loss": 1.2369, + "step": 112900 + }, + { + "epoch": 1.57, + "learning_rate": 2.2131890943033476e-05, + "loss": 1.2144, + "step": 113000 + }, + { + "epoch": 1.58, + "learning_rate": 2.2124925117374162e-05, + "loss": 1.2427, + "step": 113100 + }, + { + "epoch": 1.58, + "learning_rate": 2.2117959291714845e-05, + "loss": 1.2138, + "step": 113200 + }, + { + "epoch": 1.58, + "learning_rate": 2.2110993466055532e-05, + "loss": 1.1789, + "step": 113300 + }, + { + "epoch": 1.58, + "learning_rate": 2.2104027640396215e-05, + "loss": 1.2021, + "step": 113400 + }, + { + "epoch": 1.58, + "learning_rate": 2.20970618147369e-05, + "loss": 1.1692, + "step": 113500 + }, + { + "epoch": 1.58, + "learning_rate": 2.2090165647334178e-05, + "loss": 1.1913, + "step": 113600 + }, + { + "epoch": 1.58, + "learning_rate": 2.2083199821674864e-05, + "loss": 1.2328, + "step": 113700 + }, + { + "epoch": 1.59, + "learning_rate": 2.2076233996015547e-05, + "loss": 1.2098, + "step": 113800 + }, + { + "epoch": 1.59, + "learning_rate": 2.2069268170356233e-05, + "loss": 1.2152, + "step": 113900 + }, + { + "epoch": 1.59, + "learning_rate": 2.2062302344696916e-05, + "loss": 1.2237, + "step": 114000 + }, + { + "epoch": 1.59, + "learning_rate": 2.2055336519037603e-05, + "loss": 1.1828, + "step": 114100 + }, + { + "epoch": 1.59, + "learning_rate": 2.2048370693378286e-05, + "loss": 1.1819, + "step": 114200 + }, + { + "epoch": 1.59, + "learning_rate": 2.2041474525975566e-05, + "loss": 1.2185, + "step": 114300 + }, + { + "epoch": 1.59, + "learning_rate": 2.203450870031625e-05, + "loss": 1.1798, + "step": 114400 + }, + { + "epoch": 1.6, + "learning_rate": 2.2027542874656935e-05, + "loss": 1.2069, + "step": 114500 + }, + { + "epoch": 1.6, + "learning_rate": 2.2020577048997618e-05, + "loss": 1.1889, + "step": 114600 + }, + { + "epoch": 1.6, + "learning_rate": 2.2013611223338304e-05, + "loss": 1.2132, + "step": 114700 + }, + { + "epoch": 1.6, + "learning_rate": 2.2006645397678987e-05, + "loss": 1.2078, + "step": 114800 + }, + { + "epoch": 1.6, + "learning_rate": 2.1999679572019674e-05, + "loss": 1.1855, + "step": 114900 + }, + { + "epoch": 1.6, + "learning_rate": 2.1992713746360357e-05, + "loss": 1.181, + "step": 115000 + }, + { + "epoch": 1.6, + "learning_rate": 2.1985747920701043e-05, + "loss": 1.1746, + "step": 115100 + }, + { + "epoch": 1.6, + "learning_rate": 2.1978782095041726e-05, + "loss": 1.1918, + "step": 115200 + }, + { + "epoch": 1.61, + "learning_rate": 2.1971816269382412e-05, + "loss": 1.1932, + "step": 115300 + }, + { + "epoch": 1.61, + "learning_rate": 2.1964850443723092e-05, + "loss": 1.2226, + "step": 115400 + }, + { + "epoch": 1.61, + "learning_rate": 2.195788461806378e-05, + "loss": 1.2463, + "step": 115500 + }, + { + "epoch": 1.61, + "learning_rate": 2.195091879240446e-05, + "loss": 1.1943, + "step": 115600 + }, + { + "epoch": 1.61, + "learning_rate": 2.1943952966745148e-05, + "loss": 1.1542, + "step": 115700 + }, + { + "epoch": 1.61, + "learning_rate": 2.1936987141085834e-05, + "loss": 1.2038, + "step": 115800 + }, + { + "epoch": 1.61, + "learning_rate": 2.1930021315426517e-05, + "loss": 1.1774, + "step": 115900 + }, + { + "epoch": 1.62, + "learning_rate": 2.1923055489767203e-05, + "loss": 1.2212, + "step": 116000 + }, + { + "epoch": 1.62, + "learning_rate": 2.1916089664107886e-05, + "loss": 1.1795, + "step": 116100 + }, + { + "epoch": 1.62, + "learning_rate": 2.1909123838448573e-05, + "loss": 1.2041, + "step": 116200 + }, + { + "epoch": 1.62, + "learning_rate": 2.1902158012789256e-05, + "loss": 1.1721, + "step": 116300 + }, + { + "epoch": 1.62, + "learning_rate": 2.1895192187129942e-05, + "loss": 1.2156, + "step": 116400 + }, + { + "epoch": 1.62, + "learning_rate": 2.1888226361470625e-05, + "loss": 1.1982, + "step": 116500 + }, + { + "epoch": 1.62, + "learning_rate": 2.188126053581131e-05, + "loss": 1.2099, + "step": 116600 + }, + { + "epoch": 1.63, + "learning_rate": 2.1874294710151995e-05, + "loss": 1.1936, + "step": 116700 + }, + { + "epoch": 1.63, + "learning_rate": 2.1867398542749274e-05, + "loss": 1.1853, + "step": 116800 + }, + { + "epoch": 1.63, + "learning_rate": 2.1860432717089957e-05, + "loss": 1.2059, + "step": 116900 + }, + { + "epoch": 1.63, + "learning_rate": 2.1853466891430644e-05, + "loss": 1.1683, + "step": 117000 + }, + { + "epoch": 1.63, + "learning_rate": 2.1846501065771327e-05, + "loss": 1.1649, + "step": 117100 + }, + { + "epoch": 1.63, + "learning_rate": 2.1839535240112013e-05, + "loss": 1.1969, + "step": 117200 + }, + { + "epoch": 1.63, + "learning_rate": 2.1832569414452696e-05, + "loss": 1.1924, + "step": 117300 + }, + { + "epoch": 1.64, + "learning_rate": 2.1825603588793382e-05, + "loss": 1.2366, + "step": 117400 + }, + { + "epoch": 1.64, + "learning_rate": 2.1818637763134065e-05, + "loss": 1.1937, + "step": 117500 + }, + { + "epoch": 1.64, + "learning_rate": 2.1811671937474752e-05, + "loss": 1.1794, + "step": 117600 + }, + { + "epoch": 1.64, + "learning_rate": 2.1804706111815435e-05, + "loss": 1.2114, + "step": 117700 + }, + { + "epoch": 1.64, + "learning_rate": 2.1797740286156118e-05, + "loss": 1.2059, + "step": 117800 + }, + { + "epoch": 1.64, + "learning_rate": 2.17907744604968e-05, + "loss": 1.1833, + "step": 117900 + }, + { + "epoch": 1.64, + "learning_rate": 2.1783808634837487e-05, + "loss": 1.1492, + "step": 118000 + }, + { + "epoch": 1.65, + "learning_rate": 2.177684280917817e-05, + "loss": 1.1716, + "step": 118100 + }, + { + "epoch": 1.65, + "learning_rate": 2.1769876983518857e-05, + "loss": 1.156, + "step": 118200 + }, + { + "epoch": 1.65, + "learning_rate": 2.176291115785954e-05, + "loss": 1.1807, + "step": 118300 + }, + { + "epoch": 1.65, + "learning_rate": 2.1755945332200226e-05, + "loss": 1.1942, + "step": 118400 + }, + { + "epoch": 1.65, + "learning_rate": 2.174897950654091e-05, + "loss": 1.2004, + "step": 118500 + }, + { + "epoch": 1.65, + "learning_rate": 2.1742013680881595e-05, + "loss": 1.227, + "step": 118600 + }, + { + "epoch": 1.65, + "learning_rate": 2.1735047855222278e-05, + "loss": 1.2213, + "step": 118700 + }, + { + "epoch": 1.66, + "learning_rate": 2.1728082029562965e-05, + "loss": 1.2016, + "step": 118800 + }, + { + "epoch": 1.66, + "learning_rate": 2.1721116203903648e-05, + "loss": 1.2137, + "step": 118900 + }, + { + "epoch": 1.66, + "learning_rate": 2.1714150378244334e-05, + "loss": 1.1818, + "step": 119000 + }, + { + "epoch": 1.66, + "learning_rate": 2.170718455258502e-05, + "loss": 1.182, + "step": 119100 + }, + { + "epoch": 1.66, + "learning_rate": 2.1700218726925703e-05, + "loss": 1.22, + "step": 119200 + }, + { + "epoch": 1.66, + "learning_rate": 2.169325290126639e-05, + "loss": 1.1563, + "step": 119300 + }, + { + "epoch": 1.66, + "learning_rate": 2.1686287075607073e-05, + "loss": 1.2058, + "step": 119400 + }, + { + "epoch": 1.66, + "learning_rate": 2.167932124994776e-05, + "loss": 1.2657, + "step": 119500 + }, + { + "epoch": 1.67, + "learning_rate": 2.1672355424288442e-05, + "loss": 1.182, + "step": 119600 + }, + { + "epoch": 1.67, + "learning_rate": 2.166538959862913e-05, + "loss": 1.1985, + "step": 119700 + }, + { + "epoch": 1.67, + "learning_rate": 2.165842377296981e-05, + "loss": 1.1724, + "step": 119800 + }, + { + "epoch": 1.67, + "learning_rate": 2.1651457947310498e-05, + "loss": 1.1759, + "step": 119900 + }, + { + "epoch": 1.67, + "learning_rate": 2.164449212165118e-05, + "loss": 1.208, + "step": 120000 + }, + { + "epoch": 1.67, + "learning_rate": 2.1637526295991864e-05, + "loss": 1.2373, + "step": 120100 + }, + { + "epoch": 1.67, + "learning_rate": 2.1630560470332547e-05, + "loss": 1.1772, + "step": 120200 + }, + { + "epoch": 1.68, + "learning_rate": 2.1623594644673233e-05, + "loss": 1.1855, + "step": 120300 + }, + { + "epoch": 1.68, + "learning_rate": 2.1616628819013916e-05, + "loss": 1.2035, + "step": 120400 + }, + { + "epoch": 1.68, + "learning_rate": 2.1609662993354603e-05, + "loss": 1.1757, + "step": 120500 + }, + { + "epoch": 1.68, + "learning_rate": 2.1602697167695286e-05, + "loss": 1.2168, + "step": 120600 + }, + { + "epoch": 1.68, + "learning_rate": 2.1595731342035972e-05, + "loss": 1.1868, + "step": 120700 + }, + { + "epoch": 1.68, + "learning_rate": 2.1588765516376655e-05, + "loss": 1.1752, + "step": 120800 + }, + { + "epoch": 1.68, + "learning_rate": 2.158179969071734e-05, + "loss": 1.1586, + "step": 120900 + }, + { + "epoch": 1.69, + "learning_rate": 2.1574903523314618e-05, + "loss": 1.1997, + "step": 121000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1567937697655304e-05, + "loss": 1.1929, + "step": 121100 + }, + { + "epoch": 1.69, + "learning_rate": 2.1560971871995987e-05, + "loss": 1.213, + "step": 121200 + }, + { + "epoch": 1.69, + "learning_rate": 2.1554006046336673e-05, + "loss": 1.2179, + "step": 121300 + }, + { + "epoch": 1.69, + "learning_rate": 2.1547040220677356e-05, + "loss": 1.1815, + "step": 121400 + }, + { + "epoch": 1.69, + "learning_rate": 2.1540074395018043e-05, + "loss": 1.2464, + "step": 121500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1533108569358726e-05, + "loss": 1.2206, + "step": 121600 + }, + { + "epoch": 1.7, + "learning_rate": 2.1526142743699412e-05, + "loss": 1.2226, + "step": 121700 + }, + { + "epoch": 1.7, + "learning_rate": 2.1519176918040095e-05, + "loss": 1.2398, + "step": 121800 + }, + { + "epoch": 1.7, + "learning_rate": 2.151221109238078e-05, + "loss": 1.1899, + "step": 121900 + }, + { + "epoch": 1.7, + "learning_rate": 2.1505245266721465e-05, + "loss": 1.1707, + "step": 122000 + }, + { + "epoch": 1.7, + "learning_rate": 2.149827944106215e-05, + "loss": 1.206, + "step": 122100 + }, + { + "epoch": 1.7, + "learning_rate": 2.1491313615402834e-05, + "loss": 1.1963, + "step": 122200 + }, + { + "epoch": 1.7, + "learning_rate": 2.148434778974352e-05, + "loss": 1.1995, + "step": 122300 + }, + { + "epoch": 1.71, + "learning_rate": 2.1477451622340797e-05, + "loss": 1.1909, + "step": 122400 + }, + { + "epoch": 1.71, + "learning_rate": 2.1470485796681483e-05, + "loss": 1.1914, + "step": 122500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1463519971022166e-05, + "loss": 1.2262, + "step": 122600 + }, + { + "epoch": 1.71, + "learning_rate": 2.145655414536285e-05, + "loss": 1.1817, + "step": 122700 + }, + { + "epoch": 1.71, + "learning_rate": 2.1449588319703532e-05, + "loss": 1.1918, + "step": 122800 + }, + { + "epoch": 1.71, + "learning_rate": 2.144262249404422e-05, + "loss": 1.1855, + "step": 122900 + }, + { + "epoch": 1.71, + "learning_rate": 2.14356566683849e-05, + "loss": 1.202, + "step": 123000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1428690842725588e-05, + "loss": 1.2127, + "step": 123100 + }, + { + "epoch": 1.72, + "learning_rate": 2.1421725017066274e-05, + "loss": 1.1939, + "step": 123200 + }, + { + "epoch": 1.72, + "learning_rate": 2.1414759191406957e-05, + "loss": 1.1893, + "step": 123300 + }, + { + "epoch": 1.72, + "learning_rate": 2.1407793365747644e-05, + "loss": 1.1822, + "step": 123400 + }, + { + "epoch": 1.72, + "learning_rate": 2.1400827540088327e-05, + "loss": 1.203, + "step": 123500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1393861714429013e-05, + "loss": 1.2094, + "step": 123600 + }, + { + "epoch": 1.72, + "learning_rate": 2.1386895888769696e-05, + "loss": 1.1569, + "step": 123700 + }, + { + "epoch": 1.72, + "learning_rate": 2.1379930063110382e-05, + "loss": 1.1824, + "step": 123800 + }, + { + "epoch": 1.73, + "learning_rate": 2.1372964237451065e-05, + "loss": 1.2077, + "step": 123900 + }, + { + "epoch": 1.73, + "learning_rate": 2.136599841179175e-05, + "loss": 1.241, + "step": 124000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1359032586132435e-05, + "loss": 1.2163, + "step": 124100 + }, + { + "epoch": 1.73, + "learning_rate": 2.135206676047312e-05, + "loss": 1.1717, + "step": 124200 + }, + { + "epoch": 1.73, + "learning_rate": 2.1345100934813804e-05, + "loss": 1.1579, + "step": 124300 + }, + { + "epoch": 1.73, + "learning_rate": 2.133813510915449e-05, + "loss": 1.1677, + "step": 124400 + }, + { + "epoch": 1.73, + "learning_rate": 2.1331169283495173e-05, + "loss": 1.17, + "step": 124500 + }, + { + "epoch": 1.74, + "learning_rate": 2.132420345783586e-05, + "loss": 1.2317, + "step": 124600 + }, + { + "epoch": 1.74, + "learning_rate": 2.1317237632176543e-05, + "loss": 1.2105, + "step": 124700 + }, + { + "epoch": 1.74, + "learning_rate": 2.131027180651723e-05, + "loss": 1.2142, + "step": 124800 + }, + { + "epoch": 1.74, + "learning_rate": 2.130330598085791e-05, + "loss": 1.2362, + "step": 124900 + }, + { + "epoch": 1.74, + "learning_rate": 2.1296340155198595e-05, + "loss": 1.2074, + "step": 125000 + }, + { + "epoch": 1.74, + "learning_rate": 2.1289374329539278e-05, + "loss": 1.2149, + "step": 125100 + }, + { + "epoch": 1.74, + "learning_rate": 2.1282408503879965e-05, + "loss": 1.1955, + "step": 125200 + }, + { + "epoch": 1.75, + "learning_rate": 2.1275442678220647e-05, + "loss": 1.1841, + "step": 125300 + }, + { + "epoch": 1.75, + "learning_rate": 2.1268476852561334e-05, + "loss": 1.1668, + "step": 125400 + }, + { + "epoch": 1.75, + "learning_rate": 2.1261511026902017e-05, + "loss": 1.2053, + "step": 125500 + }, + { + "epoch": 1.75, + "learning_rate": 2.1254545201242703e-05, + "loss": 1.2002, + "step": 125600 + }, + { + "epoch": 1.75, + "learning_rate": 2.124764903383998e-05, + "loss": 1.1777, + "step": 125700 + }, + { + "epoch": 1.75, + "learning_rate": 2.1240683208180666e-05, + "loss": 1.207, + "step": 125800 + }, + { + "epoch": 1.75, + "learning_rate": 2.123371738252135e-05, + "loss": 1.1941, + "step": 125900 + }, + { + "epoch": 1.76, + "learning_rate": 2.1226751556862035e-05, + "loss": 1.2201, + "step": 126000 + }, + { + "epoch": 1.76, + "learning_rate": 2.121978573120272e-05, + "loss": 1.1776, + "step": 126100 + }, + { + "epoch": 1.76, + "learning_rate": 2.1212819905543405e-05, + "loss": 1.2108, + "step": 126200 + }, + { + "epoch": 1.76, + "learning_rate": 2.1205854079884088e-05, + "loss": 1.203, + "step": 126300 + }, + { + "epoch": 1.76, + "learning_rate": 2.1198888254224774e-05, + "loss": 1.1849, + "step": 126400 + }, + { + "epoch": 1.76, + "learning_rate": 2.119192242856546e-05, + "loss": 1.2132, + "step": 126500 + }, + { + "epoch": 1.76, + "learning_rate": 2.1184956602906144e-05, + "loss": 1.1879, + "step": 126600 + }, + { + "epoch": 1.77, + "learning_rate": 2.117799077724683e-05, + "loss": 1.2181, + "step": 126700 + }, + { + "epoch": 1.77, + "learning_rate": 2.1171024951587513e-05, + "loss": 1.2248, + "step": 126800 + }, + { + "epoch": 1.77, + "learning_rate": 2.11640591259282e-05, + "loss": 1.184, + "step": 126900 + }, + { + "epoch": 1.77, + "learning_rate": 2.1157093300268882e-05, + "loss": 1.1824, + "step": 127000 + }, + { + "epoch": 1.77, + "learning_rate": 2.115012747460957e-05, + "loss": 1.207, + "step": 127100 + }, + { + "epoch": 1.77, + "learning_rate": 2.114316164895025e-05, + "loss": 1.1663, + "step": 127200 + }, + { + "epoch": 1.77, + "learning_rate": 2.1136195823290935e-05, + "loss": 1.1765, + "step": 127300 + }, + { + "epoch": 1.77, + "learning_rate": 2.1129229997631618e-05, + "loss": 1.2084, + "step": 127400 + }, + { + "epoch": 1.78, + "learning_rate": 2.1122264171972304e-05, + "loss": 1.1856, + "step": 127500 + }, + { + "epoch": 1.78, + "learning_rate": 2.1115298346312987e-05, + "loss": 1.2146, + "step": 127600 + }, + { + "epoch": 1.78, + "learning_rate": 2.1108332520653673e-05, + "loss": 1.2269, + "step": 127700 + }, + { + "epoch": 1.78, + "learning_rate": 2.110143635325095e-05, + "loss": 1.1899, + "step": 127800 + }, + { + "epoch": 1.78, + "learning_rate": 2.1094470527591636e-05, + "loss": 1.212, + "step": 127900 + }, + { + "epoch": 1.78, + "learning_rate": 2.108750470193232e-05, + "loss": 1.178, + "step": 128000 + }, + { + "epoch": 1.78, + "learning_rate": 2.1080538876273006e-05, + "loss": 1.2044, + "step": 128100 + }, + { + "epoch": 1.79, + "learning_rate": 2.107357305061369e-05, + "loss": 1.1994, + "step": 128200 + }, + { + "epoch": 1.79, + "learning_rate": 2.1066607224954375e-05, + "loss": 1.16, + "step": 128300 + }, + { + "epoch": 1.79, + "learning_rate": 2.1059641399295058e-05, + "loss": 1.2212, + "step": 128400 + }, + { + "epoch": 1.79, + "learning_rate": 2.1052675573635744e-05, + "loss": 1.1828, + "step": 128500 + }, + { + "epoch": 1.79, + "learning_rate": 2.1045709747976427e-05, + "loss": 1.185, + "step": 128600 + }, + { + "epoch": 1.79, + "learning_rate": 2.1038743922317114e-05, + "loss": 1.2089, + "step": 128700 + }, + { + "epoch": 1.79, + "learning_rate": 2.1031778096657797e-05, + "loss": 1.2048, + "step": 128800 + }, + { + "epoch": 1.8, + "learning_rate": 2.1024812270998483e-05, + "loss": 1.2108, + "step": 128900 + }, + { + "epoch": 1.8, + "learning_rate": 2.1017846445339166e-05, + "loss": 1.1907, + "step": 129000 + }, + { + "epoch": 1.8, + "learning_rate": 2.1010880619679852e-05, + "loss": 1.1854, + "step": 129100 + }, + { + "epoch": 1.8, + "learning_rate": 2.1003914794020535e-05, + "loss": 1.1757, + "step": 129200 + }, + { + "epoch": 1.8, + "learning_rate": 2.0996948968361222e-05, + "loss": 1.1574, + "step": 129300 + }, + { + "epoch": 1.8, + "learning_rate": 2.0989983142701905e-05, + "loss": 1.2082, + "step": 129400 + }, + { + "epoch": 1.8, + "learning_rate": 2.098301731704259e-05, + "loss": 1.1877, + "step": 129500 + }, + { + "epoch": 1.81, + "learning_rate": 2.0976051491383274e-05, + "loss": 1.1789, + "step": 129600 + }, + { + "epoch": 1.81, + "learning_rate": 2.0969085665723957e-05, + "loss": 1.1804, + "step": 129700 + }, + { + "epoch": 1.81, + "learning_rate": 2.0962119840064643e-05, + "loss": 1.212, + "step": 129800 + }, + { + "epoch": 1.81, + "learning_rate": 2.0955154014405326e-05, + "loss": 1.2151, + "step": 129900 + }, + { + "epoch": 1.81, + "learning_rate": 2.0948188188746013e-05, + "loss": 1.1798, + "step": 130000 + }, + { + "epoch": 1.81, + "learning_rate": 2.0941222363086696e-05, + "loss": 1.1934, + "step": 130100 + }, + { + "epoch": 1.81, + "learning_rate": 2.0934256537427382e-05, + "loss": 1.1635, + "step": 130200 + }, + { + "epoch": 1.82, + "learning_rate": 2.0927290711768065e-05, + "loss": 1.2152, + "step": 130300 + }, + { + "epoch": 1.82, + "learning_rate": 2.092032488610875e-05, + "loss": 1.1814, + "step": 130400 + }, + { + "epoch": 1.82, + "learning_rate": 2.0913359060449435e-05, + "loss": 1.2153, + "step": 130500 + }, + { + "epoch": 1.82, + "learning_rate": 2.090639323479012e-05, + "loss": 1.1722, + "step": 130600 + }, + { + "epoch": 1.82, + "learning_rate": 2.0899427409130804e-05, + "loss": 1.1957, + "step": 130700 + }, + { + "epoch": 1.82, + "learning_rate": 2.089246158347149e-05, + "loss": 1.1688, + "step": 130800 + }, + { + "epoch": 1.82, + "learning_rate": 2.0885495757812173e-05, + "loss": 1.2204, + "step": 130900 + }, + { + "epoch": 1.83, + "learning_rate": 2.087852993215286e-05, + "loss": 1.1939, + "step": 131000 + }, + { + "epoch": 1.83, + "learning_rate": 2.0871564106493543e-05, + "loss": 1.1904, + "step": 131100 + }, + { + "epoch": 1.83, + "learning_rate": 2.0864667939090822e-05, + "loss": 1.187, + "step": 131200 + }, + { + "epoch": 1.83, + "learning_rate": 2.0857702113431505e-05, + "loss": 1.1394, + "step": 131300 + }, + { + "epoch": 1.83, + "learning_rate": 2.0850736287772192e-05, + "loss": 1.1678, + "step": 131400 + }, + { + "epoch": 1.83, + "learning_rate": 2.0843770462112875e-05, + "loss": 1.2065, + "step": 131500 + }, + { + "epoch": 1.83, + "learning_rate": 2.083680463645356e-05, + "loss": 1.2385, + "step": 131600 + }, + { + "epoch": 1.83, + "learning_rate": 2.0829838810794244e-05, + "loss": 1.1825, + "step": 131700 + }, + { + "epoch": 1.84, + "learning_rate": 2.082287298513493e-05, + "loss": 1.2232, + "step": 131800 + }, + { + "epoch": 1.84, + "learning_rate": 2.0815907159475614e-05, + "loss": 1.2067, + "step": 131900 + }, + { + "epoch": 1.84, + "learning_rate": 2.08089413338163e-05, + "loss": 1.2003, + "step": 132000 + }, + { + "epoch": 1.84, + "learning_rate": 2.0801975508156983e-05, + "loss": 1.1924, + "step": 132100 + }, + { + "epoch": 1.84, + "learning_rate": 2.0795009682497666e-05, + "loss": 1.1814, + "step": 132200 + }, + { + "epoch": 1.84, + "learning_rate": 2.078804385683835e-05, + "loss": 1.2183, + "step": 132300 + }, + { + "epoch": 1.84, + "learning_rate": 2.0781078031179035e-05, + "loss": 1.1836, + "step": 132400 + }, + { + "epoch": 1.85, + "learning_rate": 2.0774112205519718e-05, + "loss": 1.2281, + "step": 132500 + }, + { + "epoch": 1.85, + "learning_rate": 2.0767146379860405e-05, + "loss": 1.2025, + "step": 132600 + }, + { + "epoch": 1.85, + "learning_rate": 2.0760180554201088e-05, + "loss": 1.2013, + "step": 132700 + }, + { + "epoch": 1.85, + "learning_rate": 2.0753214728541774e-05, + "loss": 1.2316, + "step": 132800 + }, + { + "epoch": 1.85, + "learning_rate": 2.074624890288246e-05, + "loss": 1.2001, + "step": 132900 + }, + { + "epoch": 1.85, + "learning_rate": 2.0739283077223143e-05, + "loss": 1.2178, + "step": 133000 + }, + { + "epoch": 1.85, + "learning_rate": 2.073231725156383e-05, + "loss": 1.1835, + "step": 133100 + }, + { + "epoch": 1.86, + "learning_rate": 2.0725351425904513e-05, + "loss": 1.2126, + "step": 133200 + }, + { + "epoch": 1.86, + "learning_rate": 2.07183856002452e-05, + "loss": 1.1815, + "step": 133300 + }, + { + "epoch": 1.86, + "learning_rate": 2.0711419774585882e-05, + "loss": 1.1677, + "step": 133400 + }, + { + "epoch": 1.86, + "learning_rate": 2.070445394892657e-05, + "loss": 1.1902, + "step": 133500 + }, + { + "epoch": 1.86, + "learning_rate": 2.069748812326725e-05, + "loss": 1.1912, + "step": 133600 + }, + { + "epoch": 1.86, + "learning_rate": 2.0690522297607938e-05, + "loss": 1.2141, + "step": 133700 + }, + { + "epoch": 1.86, + "learning_rate": 2.068355647194862e-05, + "loss": 1.229, + "step": 133800 + }, + { + "epoch": 1.87, + "learning_rate": 2.0676590646289307e-05, + "loss": 1.1643, + "step": 133900 + }, + { + "epoch": 1.87, + "learning_rate": 2.066962482062999e-05, + "loss": 1.2217, + "step": 134000 + }, + { + "epoch": 1.87, + "learning_rate": 2.0662658994970677e-05, + "loss": 1.164, + "step": 134100 + }, + { + "epoch": 1.87, + "learning_rate": 2.065569316931136e-05, + "loss": 1.2169, + "step": 134200 + }, + { + "epoch": 1.87, + "learning_rate": 2.0648727343652046e-05, + "loss": 1.2057, + "step": 134300 + }, + { + "epoch": 1.87, + "learning_rate": 2.0641761517992726e-05, + "loss": 1.1871, + "step": 134400 + }, + { + "epoch": 1.87, + "learning_rate": 2.063486535059001e-05, + "loss": 1.2043, + "step": 134500 + }, + { + "epoch": 1.88, + "learning_rate": 2.062789952493069e-05, + "loss": 1.2072, + "step": 134600 + }, + { + "epoch": 1.88, + "learning_rate": 2.0620933699271375e-05, + "loss": 1.2292, + "step": 134700 + }, + { + "epoch": 1.88, + "learning_rate": 2.0613967873612058e-05, + "loss": 1.2426, + "step": 134800 + }, + { + "epoch": 1.88, + "learning_rate": 2.0607002047952744e-05, + "loss": 1.1745, + "step": 134900 + }, + { + "epoch": 1.88, + "learning_rate": 2.0600036222293427e-05, + "loss": 1.2162, + "step": 135000 + }, + { + "epoch": 1.88, + "learning_rate": 2.0593070396634113e-05, + "loss": 1.1857, + "step": 135100 + }, + { + "epoch": 1.88, + "learning_rate": 2.0586104570974796e-05, + "loss": 1.2112, + "step": 135200 + }, + { + "epoch": 1.88, + "learning_rate": 2.0579138745315483e-05, + "loss": 1.1718, + "step": 135300 + }, + { + "epoch": 1.89, + "learning_rate": 2.0572172919656166e-05, + "loss": 1.189, + "step": 135400 + }, + { + "epoch": 1.89, + "learning_rate": 2.0565207093996852e-05, + "loss": 1.1931, + "step": 135500 + }, + { + "epoch": 1.89, + "learning_rate": 2.0558241268337535e-05, + "loss": 1.1882, + "step": 135600 + }, + { + "epoch": 1.89, + "learning_rate": 2.055127544267822e-05, + "loss": 1.1711, + "step": 135700 + }, + { + "epoch": 1.89, + "learning_rate": 2.0544309617018905e-05, + "loss": 1.1907, + "step": 135800 + }, + { + "epoch": 1.89, + "learning_rate": 2.053734379135959e-05, + "loss": 1.2113, + "step": 135900 + }, + { + "epoch": 1.89, + "learning_rate": 2.0530377965700274e-05, + "loss": 1.2093, + "step": 136000 + }, + { + "epoch": 1.9, + "learning_rate": 2.052341214004096e-05, + "loss": 1.1927, + "step": 136100 + }, + { + "epoch": 1.9, + "learning_rate": 2.0516446314381647e-05, + "loss": 1.2145, + "step": 136200 + }, + { + "epoch": 1.9, + "learning_rate": 2.050948048872233e-05, + "loss": 1.1858, + "step": 136300 + }, + { + "epoch": 1.9, + "learning_rate": 2.0502514663063016e-05, + "loss": 1.1903, + "step": 136400 + }, + { + "epoch": 1.9, + "learning_rate": 2.04955488374037e-05, + "loss": 1.1671, + "step": 136500 + }, + { + "epoch": 1.9, + "learning_rate": 2.0488583011744385e-05, + "loss": 1.1913, + "step": 136600 + }, + { + "epoch": 1.9, + "learning_rate": 2.048161718608507e-05, + "loss": 1.2293, + "step": 136700 + }, + { + "epoch": 1.91, + "learning_rate": 2.047465136042575e-05, + "loss": 1.1974, + "step": 136800 + }, + { + "epoch": 1.91, + "learning_rate": 2.0467685534766434e-05, + "loss": 1.2024, + "step": 136900 + }, + { + "epoch": 1.91, + "learning_rate": 2.046071970910712e-05, + "loss": 1.2, + "step": 137000 + }, + { + "epoch": 1.91, + "learning_rate": 2.0453753883447804e-05, + "loss": 1.1975, + "step": 137100 + }, + { + "epoch": 1.91, + "learning_rate": 2.044678805778849e-05, + "loss": 1.2117, + "step": 137200 + }, + { + "epoch": 1.91, + "learning_rate": 2.0439822232129173e-05, + "loss": 1.1786, + "step": 137300 + }, + { + "epoch": 1.91, + "learning_rate": 2.043285640646986e-05, + "loss": 1.193, + "step": 137400 + }, + { + "epoch": 1.92, + "learning_rate": 2.0425890580810542e-05, + "loss": 1.2061, + "step": 137500 + }, + { + "epoch": 1.92, + "learning_rate": 2.041892475515123e-05, + "loss": 1.1696, + "step": 137600 + }, + { + "epoch": 1.92, + "learning_rate": 2.0411958929491912e-05, + "loss": 1.1869, + "step": 137700 + }, + { + "epoch": 1.92, + "learning_rate": 2.0404993103832598e-05, + "loss": 1.2024, + "step": 137800 + }, + { + "epoch": 1.92, + "learning_rate": 2.039802727817328e-05, + "loss": 1.1441, + "step": 137900 + }, + { + "epoch": 1.92, + "learning_rate": 2.0391061452513968e-05, + "loss": 1.1947, + "step": 138000 + }, + { + "epoch": 1.92, + "learning_rate": 2.038409562685465e-05, + "loss": 1.1952, + "step": 138100 + }, + { + "epoch": 1.93, + "learning_rate": 2.0377129801195337e-05, + "loss": 1.2063, + "step": 138200 + }, + { + "epoch": 1.93, + "learning_rate": 2.037016397553602e-05, + "loss": 1.1833, + "step": 138300 + }, + { + "epoch": 1.93, + "learning_rate": 2.0363198149876706e-05, + "loss": 1.199, + "step": 138400 + }, + { + "epoch": 1.93, + "learning_rate": 2.035623232421739e-05, + "loss": 1.176, + "step": 138500 + }, + { + "epoch": 1.93, + "learning_rate": 2.0349266498558076e-05, + "loss": 1.1947, + "step": 138600 + }, + { + "epoch": 1.93, + "learning_rate": 2.0342300672898762e-05, + "loss": 1.1985, + "step": 138700 + }, + { + "epoch": 1.93, + "learning_rate": 2.0335334847239445e-05, + "loss": 1.19, + "step": 138800 + }, + { + "epoch": 1.94, + "learning_rate": 2.032836902158013e-05, + "loss": 1.1761, + "step": 138900 + }, + { + "epoch": 1.94, + "learning_rate": 2.0321472854177408e-05, + "loss": 1.1637, + "step": 139000 + }, + { + "epoch": 1.94, + "learning_rate": 2.031450702851809e-05, + "loss": 1.1896, + "step": 139100 + }, + { + "epoch": 1.94, + "learning_rate": 2.0307541202858777e-05, + "loss": 1.1838, + "step": 139200 + }, + { + "epoch": 1.94, + "learning_rate": 2.0300575377199457e-05, + "loss": 1.2248, + "step": 139300 + }, + { + "epoch": 1.94, + "learning_rate": 2.0293609551540143e-05, + "loss": 1.2136, + "step": 139400 + }, + { + "epoch": 1.94, + "learning_rate": 2.028664372588083e-05, + "loss": 1.1767, + "step": 139500 + }, + { + "epoch": 1.94, + "learning_rate": 2.0279677900221513e-05, + "loss": 1.2188, + "step": 139600 + }, + { + "epoch": 1.95, + "learning_rate": 2.02727120745622e-05, + "loss": 1.1943, + "step": 139700 + }, + { + "epoch": 1.95, + "learning_rate": 2.0265746248902882e-05, + "loss": 1.1964, + "step": 139800 + }, + { + "epoch": 1.95, + "learning_rate": 2.0258780423243568e-05, + "loss": 1.1937, + "step": 139900 + }, + { + "epoch": 1.95, + "learning_rate": 2.025181459758425e-05, + "loss": 1.1766, + "step": 140000 + }, + { + "epoch": 1.95, + "learning_rate": 2.0244848771924938e-05, + "loss": 1.2146, + "step": 140100 + }, + { + "epoch": 1.95, + "learning_rate": 2.023788294626562e-05, + "loss": 1.2048, + "step": 140200 + }, + { + "epoch": 1.95, + "learning_rate": 2.0230917120606307e-05, + "loss": 1.1679, + "step": 140300 + }, + { + "epoch": 1.96, + "learning_rate": 2.022395129494699e-05, + "loss": 1.2035, + "step": 140400 + }, + { + "epoch": 1.96, + "learning_rate": 2.0216985469287676e-05, + "loss": 1.1797, + "step": 140500 + }, + { + "epoch": 1.96, + "learning_rate": 2.021001964362836e-05, + "loss": 1.1798, + "step": 140600 + }, + { + "epoch": 1.96, + "learning_rate": 2.0203053817969046e-05, + "loss": 1.1807, + "step": 140700 + }, + { + "epoch": 1.96, + "learning_rate": 2.0196157650566322e-05, + "loss": 1.2107, + "step": 140800 + }, + { + "epoch": 1.96, + "learning_rate": 2.018919182490701e-05, + "loss": 1.1618, + "step": 140900 + }, + { + "epoch": 1.96, + "learning_rate": 2.018222599924769e-05, + "loss": 1.18, + "step": 141000 + }, + { + "epoch": 1.97, + "learning_rate": 2.0175260173588378e-05, + "loss": 1.257, + "step": 141100 + }, + { + "epoch": 1.97, + "learning_rate": 2.016829434792906e-05, + "loss": 1.1782, + "step": 141200 + }, + { + "epoch": 1.97, + "learning_rate": 2.0161328522269747e-05, + "loss": 1.2087, + "step": 141300 + }, + { + "epoch": 1.97, + "learning_rate": 2.015436269661043e-05, + "loss": 1.1758, + "step": 141400 + }, + { + "epoch": 1.97, + "learning_rate": 2.0147396870951117e-05, + "loss": 1.1777, + "step": 141500 + }, + { + "epoch": 1.97, + "learning_rate": 2.01404310452918e-05, + "loss": 1.1925, + "step": 141600 + }, + { + "epoch": 1.97, + "learning_rate": 2.0133465219632483e-05, + "loss": 1.1688, + "step": 141700 + }, + { + "epoch": 1.98, + "learning_rate": 2.0126499393973166e-05, + "loss": 1.1683, + "step": 141800 + }, + { + "epoch": 1.98, + "learning_rate": 2.0119533568313852e-05, + "loss": 1.156, + "step": 141900 + }, + { + "epoch": 1.98, + "learning_rate": 2.0112567742654535e-05, + "loss": 1.1716, + "step": 142000 + }, + { + "epoch": 1.98, + "learning_rate": 2.010560191699522e-05, + "loss": 1.1522, + "step": 142100 + }, + { + "epoch": 1.98, + "learning_rate": 2.0098636091335904e-05, + "loss": 1.184, + "step": 142200 + }, + { + "epoch": 1.98, + "learning_rate": 2.009167026567659e-05, + "loss": 1.1877, + "step": 142300 + }, + { + "epoch": 1.98, + "learning_rate": 2.0084704440017274e-05, + "loss": 1.1699, + "step": 142400 + }, + { + "epoch": 1.99, + "learning_rate": 2.007773861435796e-05, + "loss": 1.1616, + "step": 142500 + }, + { + "epoch": 1.99, + "learning_rate": 2.0070772788698643e-05, + "loss": 1.1984, + "step": 142600 + }, + { + "epoch": 1.99, + "learning_rate": 2.006380696303933e-05, + "loss": 1.1819, + "step": 142700 + }, + { + "epoch": 1.99, + "learning_rate": 2.0056841137380016e-05, + "loss": 1.2229, + "step": 142800 + }, + { + "epoch": 1.99, + "learning_rate": 2.00498753117207e-05, + "loss": 1.1845, + "step": 142900 + }, + { + "epoch": 1.99, + "learning_rate": 2.0042909486061385e-05, + "loss": 1.1822, + "step": 143000 + }, + { + "epoch": 1.99, + "learning_rate": 2.003601331865866e-05, + "loss": 1.2037, + "step": 143100 + }, + { + "epoch": 2.0, + "learning_rate": 2.0029047492999345e-05, + "loss": 1.1949, + "step": 143200 + }, + { + "epoch": 2.0, + "learning_rate": 2.0022151325596624e-05, + "loss": 1.1887, + "step": 143300 + }, + { + "epoch": 2.0, + "learning_rate": 2.00152551581939e-05, + "loss": 1.1854, + "step": 143400 + }, + { + "epoch": 2.0, + "learning_rate": 2.0008289332534587e-05, + "loss": 1.191, + "step": 143500 + }, + { + "epoch": 2.0, + "eval_gen_len": 20.0, + "eval_loss": 1.1614574193954468, + "eval_rouge1": 11.8484, + "eval_rouge2": 3.363, + "eval_rougeL": 11.4175, + "eval_rougeLsum": 11.5037, + "eval_runtime": 1532.2202, + "eval_samples_per_second": 8.725, + "eval_steps_per_second": 2.181, + "step": 143558 + }, + { + "epoch": 2.0, + "learning_rate": 2.000132350687527e-05, + "loss": 1.1515, + "step": 143600 + }, + { + "epoch": 2.0, + "learning_rate": 1.9994357681215957e-05, + "loss": 1.0722, + "step": 143700 + }, + { + "epoch": 2.0, + "learning_rate": 1.998739185555664e-05, + "loss": 1.0699, + "step": 143800 + }, + { + "epoch": 2.0, + "learning_rate": 1.9980426029897326e-05, + "loss": 1.0917, + "step": 143900 + }, + { + "epoch": 2.01, + "learning_rate": 1.997346020423801e-05, + "loss": 1.0699, + "step": 144000 + }, + { + "epoch": 2.01, + "learning_rate": 1.9966494378578695e-05, + "loss": 1.0534, + "step": 144100 + }, + { + "epoch": 2.01, + "learning_rate": 1.995952855291938e-05, + "loss": 1.0903, + "step": 144200 + }, + { + "epoch": 2.01, + "learning_rate": 1.9952562727260065e-05, + "loss": 1.0532, + "step": 144300 + }, + { + "epoch": 2.01, + "learning_rate": 1.9945596901600748e-05, + "loss": 1.0783, + "step": 144400 + }, + { + "epoch": 2.01, + "learning_rate": 1.993863107594143e-05, + "loss": 1.0558, + "step": 144500 + }, + { + "epoch": 2.01, + "learning_rate": 1.9931665250282114e-05, + "loss": 1.0627, + "step": 144600 + }, + { + "epoch": 2.02, + "learning_rate": 1.99246994246228e-05, + "loss": 1.1152, + "step": 144700 + }, + { + "epoch": 2.02, + "learning_rate": 1.9917733598963483e-05, + "loss": 1.0542, + "step": 144800 + }, + { + "epoch": 2.02, + "learning_rate": 1.991076777330417e-05, + "loss": 1.0506, + "step": 144900 + }, + { + "epoch": 2.02, + "learning_rate": 1.9903801947644852e-05, + "loss": 1.0801, + "step": 145000 + }, + { + "epoch": 2.02, + "learning_rate": 1.989683612198554e-05, + "loss": 1.0663, + "step": 145100 + }, + { + "epoch": 2.02, + "learning_rate": 1.9889870296326225e-05, + "loss": 1.1189, + "step": 145200 + }, + { + "epoch": 2.02, + "learning_rate": 1.9882904470666908e-05, + "loss": 1.0802, + "step": 145300 + }, + { + "epoch": 2.03, + "learning_rate": 1.9875938645007595e-05, + "loss": 1.0668, + "step": 145400 + }, + { + "epoch": 2.03, + "learning_rate": 1.9868972819348278e-05, + "loss": 1.0909, + "step": 145500 + }, + { + "epoch": 2.03, + "learning_rate": 1.9862006993688964e-05, + "loss": 1.0837, + "step": 145600 + }, + { + "epoch": 2.03, + "learning_rate": 1.9855041168029647e-05, + "loss": 1.1061, + "step": 145700 + }, + { + "epoch": 2.03, + "learning_rate": 1.9848075342370333e-05, + "loss": 1.0728, + "step": 145800 + }, + { + "epoch": 2.03, + "learning_rate": 1.9841109516711016e-05, + "loss": 1.0877, + "step": 145900 + }, + { + "epoch": 2.03, + "learning_rate": 1.9834143691051703e-05, + "loss": 1.0797, + "step": 146000 + }, + { + "epoch": 2.04, + "learning_rate": 1.9827177865392386e-05, + "loss": 1.0839, + "step": 146100 + }, + { + "epoch": 2.04, + "learning_rate": 1.9820212039733072e-05, + "loss": 1.0552, + "step": 146200 + }, + { + "epoch": 2.04, + "learning_rate": 1.9813246214073755e-05, + "loss": 1.1046, + "step": 146300 + }, + { + "epoch": 2.04, + "learning_rate": 1.980628038841444e-05, + "loss": 1.066, + "step": 146400 + }, + { + "epoch": 2.04, + "learning_rate": 1.9799314562755124e-05, + "loss": 1.0863, + "step": 146500 + }, + { + "epoch": 2.04, + "learning_rate": 1.979234873709581e-05, + "loss": 1.0575, + "step": 146600 + }, + { + "epoch": 2.04, + "learning_rate": 1.978538291143649e-05, + "loss": 1.0753, + "step": 146700 + }, + { + "epoch": 2.05, + "learning_rate": 1.9778417085777177e-05, + "loss": 1.0724, + "step": 146800 + }, + { + "epoch": 2.05, + "learning_rate": 1.977145126011786e-05, + "loss": 1.0711, + "step": 146900 + }, + { + "epoch": 2.05, + "learning_rate": 1.9764485434458546e-05, + "loss": 1.0756, + "step": 147000 + }, + { + "epoch": 2.05, + "learning_rate": 1.975751960879923e-05, + "loss": 1.0942, + "step": 147100 + }, + { + "epoch": 2.05, + "learning_rate": 1.9750553783139915e-05, + "loss": 1.0993, + "step": 147200 + }, + { + "epoch": 2.05, + "learning_rate": 1.97435879574806e-05, + "loss": 1.0571, + "step": 147300 + }, + { + "epoch": 2.05, + "learning_rate": 1.9736622131821285e-05, + "loss": 1.0705, + "step": 147400 + }, + { + "epoch": 2.05, + "learning_rate": 1.9729656306161968e-05, + "loss": 1.0765, + "step": 147500 + }, + { + "epoch": 2.06, + "learning_rate": 1.9722690480502654e-05, + "loss": 1.0937, + "step": 147600 + }, + { + "epoch": 2.06, + "learning_rate": 1.971572465484334e-05, + "loss": 1.0741, + "step": 147700 + }, + { + "epoch": 2.06, + "learning_rate": 1.9708758829184024e-05, + "loss": 1.08, + "step": 147800 + }, + { + "epoch": 2.06, + "learning_rate": 1.970179300352471e-05, + "loss": 1.0438, + "step": 147900 + }, + { + "epoch": 2.06, + "learning_rate": 1.9694827177865393e-05, + "loss": 1.086, + "step": 148000 + }, + { + "epoch": 2.06, + "learning_rate": 1.968786135220608e-05, + "loss": 1.0679, + "step": 148100 + }, + { + "epoch": 2.06, + "learning_rate": 1.9680895526546762e-05, + "loss": 1.0575, + "step": 148200 + }, + { + "epoch": 2.07, + "learning_rate": 1.967392970088745e-05, + "loss": 1.0817, + "step": 148300 + }, + { + "epoch": 2.07, + "learning_rate": 1.966696387522813e-05, + "loss": 1.0588, + "step": 148400 + }, + { + "epoch": 2.07, + "learning_rate": 1.9659998049568818e-05, + "loss": 1.0579, + "step": 148500 + }, + { + "epoch": 2.07, + "learning_rate": 1.96530322239095e-05, + "loss": 1.1229, + "step": 148600 + }, + { + "epoch": 2.07, + "learning_rate": 1.9646066398250187e-05, + "loss": 1.0805, + "step": 148700 + }, + { + "epoch": 2.07, + "learning_rate": 1.963910057259087e-05, + "loss": 1.0663, + "step": 148800 + }, + { + "epoch": 2.07, + "learning_rate": 1.9632134746931553e-05, + "loss": 1.0779, + "step": 148900 + }, + { + "epoch": 2.08, + "learning_rate": 1.9625168921272236e-05, + "loss": 1.0883, + "step": 149000 + }, + { + "epoch": 2.08, + "learning_rate": 1.9618203095612923e-05, + "loss": 1.0812, + "step": 149100 + }, + { + "epoch": 2.08, + "learning_rate": 1.9611237269953606e-05, + "loss": 1.0774, + "step": 149200 + }, + { + "epoch": 2.08, + "learning_rate": 1.9604271444294292e-05, + "loss": 1.0757, + "step": 149300 + }, + { + "epoch": 2.08, + "learning_rate": 1.9597305618634975e-05, + "loss": 1.0626, + "step": 149400 + }, + { + "epoch": 2.08, + "learning_rate": 1.959033979297566e-05, + "loss": 1.0566, + "step": 149500 + }, + { + "epoch": 2.08, + "learning_rate": 1.9583373967316344e-05, + "loss": 1.0388, + "step": 149600 + }, + { + "epoch": 2.09, + "learning_rate": 1.957640814165703e-05, + "loss": 1.0847, + "step": 149700 + }, + { + "epoch": 2.09, + "learning_rate": 1.9569442315997714e-05, + "loss": 1.0748, + "step": 149800 + }, + { + "epoch": 2.09, + "learning_rate": 1.95624764903384e-05, + "loss": 1.0418, + "step": 149900 + }, + { + "epoch": 2.09, + "learning_rate": 1.9555580322935677e-05, + "loss": 1.0878, + "step": 150000 + }, + { + "epoch": 2.09, + "learning_rate": 1.9548614497276363e-05, + "loss": 1.0756, + "step": 150100 + }, + { + "epoch": 2.09, + "learning_rate": 1.9541648671617046e-05, + "loss": 1.11, + "step": 150200 + }, + { + "epoch": 2.09, + "learning_rate": 1.9534682845957732e-05, + "loss": 1.0875, + "step": 150300 + }, + { + "epoch": 2.1, + "learning_rate": 1.9527717020298415e-05, + "loss": 1.1018, + "step": 150400 + }, + { + "epoch": 2.1, + "learning_rate": 1.9520751194639102e-05, + "loss": 1.0643, + "step": 150500 + }, + { + "epoch": 2.1, + "learning_rate": 1.9513855027236378e-05, + "loss": 1.0752, + "step": 150600 + }, + { + "epoch": 2.1, + "learning_rate": 1.9506889201577065e-05, + "loss": 1.0931, + "step": 150700 + }, + { + "epoch": 2.1, + "learning_rate": 1.9499923375917748e-05, + "loss": 1.0546, + "step": 150800 + }, + { + "epoch": 2.1, + "learning_rate": 1.9492957550258434e-05, + "loss": 1.0705, + "step": 150900 + }, + { + "epoch": 2.1, + "learning_rate": 1.9485991724599117e-05, + "loss": 1.1041, + "step": 151000 + }, + { + "epoch": 2.11, + "learning_rate": 1.9479025898939803e-05, + "loss": 1.052, + "step": 151100 + }, + { + "epoch": 2.11, + "learning_rate": 1.9472060073280486e-05, + "loss": 1.0799, + "step": 151200 + }, + { + "epoch": 2.11, + "learning_rate": 1.9465094247621173e-05, + "loss": 1.0159, + "step": 151300 + }, + { + "epoch": 2.11, + "learning_rate": 1.9458128421961856e-05, + "loss": 1.0645, + "step": 151400 + }, + { + "epoch": 2.11, + "learning_rate": 1.945116259630254e-05, + "loss": 1.0853, + "step": 151500 + }, + { + "epoch": 2.11, + "learning_rate": 1.9444196770643225e-05, + "loss": 1.0862, + "step": 151600 + }, + { + "epoch": 2.11, + "learning_rate": 1.9437230944983908e-05, + "loss": 1.0347, + "step": 151700 + }, + { + "epoch": 2.11, + "learning_rate": 1.9430265119324594e-05, + "loss": 1.0947, + "step": 151800 + }, + { + "epoch": 2.12, + "learning_rate": 1.9423299293665277e-05, + "loss": 1.0781, + "step": 151900 + }, + { + "epoch": 2.12, + "learning_rate": 1.9416333468005964e-05, + "loss": 1.0664, + "step": 152000 + }, + { + "epoch": 2.12, + "learning_rate": 1.9409367642346647e-05, + "loss": 1.0936, + "step": 152100 + }, + { + "epoch": 2.12, + "learning_rate": 1.9402401816687333e-05, + "loss": 1.0828, + "step": 152200 + }, + { + "epoch": 2.12, + "learning_rate": 1.9395435991028016e-05, + "loss": 1.0902, + "step": 152300 + }, + { + "epoch": 2.12, + "learning_rate": 1.9388470165368702e-05, + "loss": 1.0435, + "step": 152400 + }, + { + "epoch": 2.12, + "learning_rate": 1.9381504339709385e-05, + "loss": 1.07, + "step": 152500 + }, + { + "epoch": 2.13, + "learning_rate": 1.9374538514050072e-05, + "loss": 1.0898, + "step": 152600 + }, + { + "epoch": 2.13, + "learning_rate": 1.9367572688390755e-05, + "loss": 1.0498, + "step": 152700 + }, + { + "epoch": 2.13, + "learning_rate": 1.936060686273144e-05, + "loss": 1.0636, + "step": 152800 + }, + { + "epoch": 2.13, + "learning_rate": 1.9353641037072124e-05, + "loss": 1.0898, + "step": 152900 + }, + { + "epoch": 2.13, + "learning_rate": 1.934667521141281e-05, + "loss": 1.0735, + "step": 153000 + }, + { + "epoch": 2.13, + "learning_rate": 1.9339709385753494e-05, + "loss": 1.0621, + "step": 153100 + }, + { + "epoch": 2.13, + "learning_rate": 1.933274356009418e-05, + "loss": 1.0774, + "step": 153200 + }, + { + "epoch": 2.14, + "learning_rate": 1.9325777734434863e-05, + "loss": 1.0564, + "step": 153300 + }, + { + "epoch": 2.14, + "learning_rate": 1.931881190877555e-05, + "loss": 1.1064, + "step": 153400 + }, + { + "epoch": 2.14, + "learning_rate": 1.9311846083116232e-05, + "loss": 1.0568, + "step": 153500 + }, + { + "epoch": 2.14, + "learning_rate": 1.930488025745692e-05, + "loss": 1.0978, + "step": 153600 + }, + { + "epoch": 2.14, + "learning_rate": 1.92979144317976e-05, + "loss": 1.0948, + "step": 153700 + }, + { + "epoch": 2.14, + "learning_rate": 1.9290948606138285e-05, + "loss": 1.0576, + "step": 153800 + }, + { + "epoch": 2.14, + "learning_rate": 1.9283982780478968e-05, + "loss": 1.0557, + "step": 153900 + }, + { + "epoch": 2.15, + "learning_rate": 1.9277016954819654e-05, + "loss": 1.0683, + "step": 154000 + }, + { + "epoch": 2.15, + "learning_rate": 1.927005112916034e-05, + "loss": 1.1081, + "step": 154100 + }, + { + "epoch": 2.15, + "learning_rate": 1.9263085303501023e-05, + "loss": 1.0581, + "step": 154200 + }, + { + "epoch": 2.15, + "learning_rate": 1.925611947784171e-05, + "loss": 1.0605, + "step": 154300 + }, + { + "epoch": 2.15, + "learning_rate": 1.9249153652182393e-05, + "loss": 1.0912, + "step": 154400 + }, + { + "epoch": 2.15, + "learning_rate": 1.924218782652308e-05, + "loss": 1.0877, + "step": 154500 + }, + { + "epoch": 2.15, + "learning_rate": 1.9235291659120356e-05, + "loss": 1.0878, + "step": 154600 + }, + { + "epoch": 2.16, + "learning_rate": 1.922832583346104e-05, + "loss": 1.1009, + "step": 154700 + }, + { + "epoch": 2.16, + "learning_rate": 1.9221360007801725e-05, + "loss": 1.0887, + "step": 154800 + }, + { + "epoch": 2.16, + "learning_rate": 1.921439418214241e-05, + "loss": 1.0782, + "step": 154900 + }, + { + "epoch": 2.16, + "learning_rate": 1.9207428356483094e-05, + "loss": 1.0584, + "step": 155000 + }, + { + "epoch": 2.16, + "learning_rate": 1.920046253082378e-05, + "loss": 1.0652, + "step": 155100 + }, + { + "epoch": 2.16, + "learning_rate": 1.9193496705164464e-05, + "loss": 1.0993, + "step": 155200 + }, + { + "epoch": 2.16, + "learning_rate": 1.918653087950515e-05, + "loss": 1.1073, + "step": 155300 + }, + { + "epoch": 2.16, + "learning_rate": 1.9179565053845833e-05, + "loss": 1.0592, + "step": 155400 + }, + { + "epoch": 2.17, + "learning_rate": 1.917259922818652e-05, + "loss": 1.0819, + "step": 155500 + }, + { + "epoch": 2.17, + "learning_rate": 1.9165633402527202e-05, + "loss": 1.0764, + "step": 155600 + }, + { + "epoch": 2.17, + "learning_rate": 1.915866757686789e-05, + "loss": 1.103, + "step": 155700 + }, + { + "epoch": 2.17, + "learning_rate": 1.9151701751208572e-05, + "loss": 1.0686, + "step": 155800 + }, + { + "epoch": 2.17, + "learning_rate": 1.9144735925549258e-05, + "loss": 1.106, + "step": 155900 + }, + { + "epoch": 2.17, + "learning_rate": 1.913777009988994e-05, + "loss": 1.0904, + "step": 156000 + }, + { + "epoch": 2.17, + "learning_rate": 1.9130804274230627e-05, + "loss": 1.0756, + "step": 156100 + }, + { + "epoch": 2.18, + "learning_rate": 1.9123838448571307e-05, + "loss": 1.0832, + "step": 156200 + }, + { + "epoch": 2.18, + "learning_rate": 1.9116872622911993e-05, + "loss": 1.0617, + "step": 156300 + }, + { + "epoch": 2.18, + "learning_rate": 1.9109906797252676e-05, + "loss": 1.0715, + "step": 156400 + }, + { + "epoch": 2.18, + "learning_rate": 1.9102940971593363e-05, + "loss": 1.0961, + "step": 156500 + }, + { + "epoch": 2.18, + "learning_rate": 1.9095975145934046e-05, + "loss": 1.0632, + "step": 156600 + }, + { + "epoch": 2.18, + "learning_rate": 1.9089009320274732e-05, + "loss": 1.0478, + "step": 156700 + }, + { + "epoch": 2.18, + "learning_rate": 1.9082043494615415e-05, + "loss": 1.0861, + "step": 156800 + }, + { + "epoch": 2.19, + "learning_rate": 1.90750776689561e-05, + "loss": 1.0971, + "step": 156900 + }, + { + "epoch": 2.19, + "learning_rate": 1.9068181501553378e-05, + "loss": 1.0603, + "step": 157000 + }, + { + "epoch": 2.19, + "learning_rate": 1.9061215675894064e-05, + "loss": 1.0885, + "step": 157100 + }, + { + "epoch": 2.19, + "learning_rate": 1.9054249850234747e-05, + "loss": 1.0893, + "step": 157200 + }, + { + "epoch": 2.19, + "learning_rate": 1.9047284024575434e-05, + "loss": 1.085, + "step": 157300 + }, + { + "epoch": 2.19, + "learning_rate": 1.9040318198916117e-05, + "loss": 1.0892, + "step": 157400 + }, + { + "epoch": 2.19, + "learning_rate": 1.9033352373256803e-05, + "loss": 1.0486, + "step": 157500 + }, + { + "epoch": 2.2, + "learning_rate": 1.9026386547597486e-05, + "loss": 1.1074, + "step": 157600 + }, + { + "epoch": 2.2, + "learning_rate": 1.9019420721938172e-05, + "loss": 1.0732, + "step": 157700 + }, + { + "epoch": 2.2, + "learning_rate": 1.9012454896278855e-05, + "loss": 1.0925, + "step": 157800 + }, + { + "epoch": 2.2, + "learning_rate": 1.9005489070619542e-05, + "loss": 1.0564, + "step": 157900 + }, + { + "epoch": 2.2, + "learning_rate": 1.8998523244960225e-05, + "loss": 1.0608, + "step": 158000 + }, + { + "epoch": 2.2, + "learning_rate": 1.899155741930091e-05, + "loss": 1.0669, + "step": 158100 + }, + { + "epoch": 2.2, + "learning_rate": 1.8984591593641598e-05, + "loss": 1.066, + "step": 158200 + }, + { + "epoch": 2.21, + "learning_rate": 1.897762576798228e-05, + "loss": 1.08, + "step": 158300 + }, + { + "epoch": 2.21, + "learning_rate": 1.8970659942322967e-05, + "loss": 1.0728, + "step": 158400 + }, + { + "epoch": 2.21, + "learning_rate": 1.896369411666365e-05, + "loss": 1.0898, + "step": 158500 + }, + { + "epoch": 2.21, + "learning_rate": 1.8956728291004333e-05, + "loss": 1.0327, + "step": 158600 + }, + { + "epoch": 2.21, + "learning_rate": 1.8949762465345016e-05, + "loss": 1.1012, + "step": 158700 + }, + { + "epoch": 2.21, + "learning_rate": 1.8942796639685702e-05, + "loss": 1.1121, + "step": 158800 + }, + { + "epoch": 2.21, + "learning_rate": 1.8935830814026385e-05, + "loss": 1.0478, + "step": 158900 + }, + { + "epoch": 2.22, + "learning_rate": 1.892886498836707e-05, + "loss": 1.0447, + "step": 159000 + }, + { + "epoch": 2.22, + "learning_rate": 1.8921899162707755e-05, + "loss": 1.0683, + "step": 159100 + }, + { + "epoch": 2.22, + "learning_rate": 1.891493333704844e-05, + "loss": 1.0817, + "step": 159200 + }, + { + "epoch": 2.22, + "learning_rate": 1.8907967511389124e-05, + "loss": 1.0836, + "step": 159300 + }, + { + "epoch": 2.22, + "learning_rate": 1.8901071343986404e-05, + "loss": 1.1046, + "step": 159400 + }, + { + "epoch": 2.22, + "learning_rate": 1.8894105518327087e-05, + "loss": 1.1115, + "step": 159500 + }, + { + "epoch": 2.22, + "learning_rate": 1.8887139692667773e-05, + "loss": 1.1063, + "step": 159600 + }, + { + "epoch": 2.22, + "learning_rate": 1.8880173867008456e-05, + "loss": 1.1138, + "step": 159700 + }, + { + "epoch": 2.23, + "learning_rate": 1.8873208041349143e-05, + "loss": 1.1093, + "step": 159800 + }, + { + "epoch": 2.23, + "learning_rate": 1.8866242215689826e-05, + "loss": 1.0654, + "step": 159900 + }, + { + "epoch": 2.23, + "learning_rate": 1.8859276390030512e-05, + "loss": 1.0395, + "step": 160000 + }, + { + "epoch": 2.23, + "learning_rate": 1.8852310564371195e-05, + "loss": 1.0777, + "step": 160100 + }, + { + "epoch": 2.23, + "learning_rate": 1.884534473871188e-05, + "loss": 1.0917, + "step": 160200 + }, + { + "epoch": 2.23, + "learning_rate": 1.8838378913052564e-05, + "loss": 1.096, + "step": 160300 + }, + { + "epoch": 2.23, + "learning_rate": 1.883141308739325e-05, + "loss": 1.0607, + "step": 160400 + }, + { + "epoch": 2.24, + "learning_rate": 1.8824447261733934e-05, + "loss": 1.0552, + "step": 160500 + }, + { + "epoch": 2.24, + "learning_rate": 1.881748143607462e-05, + "loss": 1.0705, + "step": 160600 + }, + { + "epoch": 2.24, + "learning_rate": 1.8810515610415303e-05, + "loss": 1.0704, + "step": 160700 + }, + { + "epoch": 2.24, + "learning_rate": 1.880354978475599e-05, + "loss": 1.0819, + "step": 160800 + }, + { + "epoch": 2.24, + "learning_rate": 1.8796583959096672e-05, + "loss": 1.0863, + "step": 160900 + }, + { + "epoch": 2.24, + "learning_rate": 1.878961813343736e-05, + "loss": 1.0816, + "step": 161000 + }, + { + "epoch": 2.24, + "learning_rate": 1.878265230777804e-05, + "loss": 1.0417, + "step": 161100 + }, + { + "epoch": 2.25, + "learning_rate": 1.8775686482118725e-05, + "loss": 1.08, + "step": 161200 + }, + { + "epoch": 2.25, + "learning_rate": 1.8768720656459408e-05, + "loss": 1.0572, + "step": 161300 + }, + { + "epoch": 2.25, + "learning_rate": 1.8761754830800094e-05, + "loss": 1.0864, + "step": 161400 + }, + { + "epoch": 2.25, + "learning_rate": 1.875478900514078e-05, + "loss": 1.0694, + "step": 161500 + }, + { + "epoch": 2.25, + "learning_rate": 1.8747892837738057e-05, + "loss": 1.0714, + "step": 161600 + }, + { + "epoch": 2.25, + "learning_rate": 1.8740996670335333e-05, + "loss": 1.0979, + "step": 161700 + }, + { + "epoch": 2.25, + "learning_rate": 1.873403084467602e-05, + "loss": 1.0908, + "step": 161800 + }, + { + "epoch": 2.26, + "learning_rate": 1.8727065019016703e-05, + "loss": 1.0693, + "step": 161900 + }, + { + "epoch": 2.26, + "learning_rate": 1.872009919335739e-05, + "loss": 1.0842, + "step": 162000 + }, + { + "epoch": 2.26, + "learning_rate": 1.8713133367698072e-05, + "loss": 1.1071, + "step": 162100 + }, + { + "epoch": 2.26, + "learning_rate": 1.870616754203876e-05, + "loss": 1.0485, + "step": 162200 + }, + { + "epoch": 2.26, + "learning_rate": 1.869920171637944e-05, + "loss": 1.0638, + "step": 162300 + }, + { + "epoch": 2.26, + "learning_rate": 1.8692235890720128e-05, + "loss": 1.0964, + "step": 162400 + }, + { + "epoch": 2.26, + "learning_rate": 1.868527006506081e-05, + "loss": 1.0537, + "step": 162500 + }, + { + "epoch": 2.27, + "learning_rate": 1.8678304239401497e-05, + "loss": 1.0453, + "step": 162600 + }, + { + "epoch": 2.27, + "learning_rate": 1.867133841374218e-05, + "loss": 1.0647, + "step": 162700 + }, + { + "epoch": 2.27, + "learning_rate": 1.8664372588082867e-05, + "loss": 1.1006, + "step": 162800 + }, + { + "epoch": 2.27, + "learning_rate": 1.865740676242355e-05, + "loss": 1.0989, + "step": 162900 + }, + { + "epoch": 2.27, + "learning_rate": 1.8650440936764236e-05, + "loss": 1.0766, + "step": 163000 + }, + { + "epoch": 2.27, + "learning_rate": 1.8643475111104922e-05, + "loss": 1.0662, + "step": 163100 + }, + { + "epoch": 2.27, + "learning_rate": 1.8636509285445605e-05, + "loss": 1.1073, + "step": 163200 + }, + { + "epoch": 2.28, + "learning_rate": 1.862954345978629e-05, + "loss": 1.0682, + "step": 163300 + }, + { + "epoch": 2.28, + "learning_rate": 1.8622577634126975e-05, + "loss": 1.0908, + "step": 163400 + }, + { + "epoch": 2.28, + "learning_rate": 1.861561180846766e-05, + "loss": 1.0464, + "step": 163500 + }, + { + "epoch": 2.28, + "learning_rate": 1.8608645982808344e-05, + "loss": 1.0401, + "step": 163600 + }, + { + "epoch": 2.28, + "learning_rate": 1.8601680157149027e-05, + "loss": 1.1031, + "step": 163700 + }, + { + "epoch": 2.28, + "learning_rate": 1.859471433148971e-05, + "loss": 1.0626, + "step": 163800 + }, + { + "epoch": 2.28, + "learning_rate": 1.8587748505830396e-05, + "loss": 1.1199, + "step": 163900 + }, + { + "epoch": 2.28, + "learning_rate": 1.858078268017108e-05, + "loss": 1.0946, + "step": 164000 + }, + { + "epoch": 2.29, + "learning_rate": 1.8573816854511766e-05, + "loss": 1.0756, + "step": 164100 + }, + { + "epoch": 2.29, + "learning_rate": 1.856685102885245e-05, + "loss": 1.0578, + "step": 164200 + }, + { + "epoch": 2.29, + "learning_rate": 1.8559885203193135e-05, + "loss": 1.0652, + "step": 164300 + }, + { + "epoch": 2.29, + "learning_rate": 1.8552919377533818e-05, + "loss": 1.0721, + "step": 164400 + }, + { + "epoch": 2.29, + "learning_rate": 1.8546092868387688e-05, + "loss": 1.0962, + "step": 164500 + }, + { + "epoch": 2.29, + "learning_rate": 1.8539127042728374e-05, + "loss": 1.0605, + "step": 164600 + }, + { + "epoch": 2.29, + "learning_rate": 1.853216121706906e-05, + "loss": 1.0535, + "step": 164700 + }, + { + "epoch": 2.3, + "learning_rate": 1.8525195391409744e-05, + "loss": 1.0563, + "step": 164800 + }, + { + "epoch": 2.3, + "learning_rate": 1.851822956575043e-05, + "loss": 1.1039, + "step": 164900 + }, + { + "epoch": 2.3, + "learning_rate": 1.8511263740091113e-05, + "loss": 1.0619, + "step": 165000 + }, + { + "epoch": 2.3, + "learning_rate": 1.85042979144318e-05, + "loss": 1.0629, + "step": 165100 + }, + { + "epoch": 2.3, + "learning_rate": 1.8497332088772482e-05, + "loss": 1.0738, + "step": 165200 + }, + { + "epoch": 2.3, + "learning_rate": 1.849036626311317e-05, + "loss": 1.0515, + "step": 165300 + }, + { + "epoch": 2.3, + "learning_rate": 1.8483400437453852e-05, + "loss": 1.0813, + "step": 165400 + }, + { + "epoch": 2.31, + "learning_rate": 1.8476434611794538e-05, + "loss": 1.0457, + "step": 165500 + }, + { + "epoch": 2.31, + "learning_rate": 1.846946878613522e-05, + "loss": 1.0754, + "step": 165600 + }, + { + "epoch": 2.31, + "learning_rate": 1.8462502960475908e-05, + "loss": 1.1107, + "step": 165700 + }, + { + "epoch": 2.31, + "learning_rate": 1.845553713481659e-05, + "loss": 1.0671, + "step": 165800 + }, + { + "epoch": 2.31, + "learning_rate": 1.8448571309157277e-05, + "loss": 1.1161, + "step": 165900 + }, + { + "epoch": 2.31, + "learning_rate": 1.844160548349796e-05, + "loss": 1.0534, + "step": 166000 + }, + { + "epoch": 2.31, + "learning_rate": 1.8434639657838646e-05, + "loss": 1.0737, + "step": 166100 + }, + { + "epoch": 2.32, + "learning_rate": 1.842767383217933e-05, + "loss": 1.0896, + "step": 166200 + }, + { + "epoch": 2.32, + "learning_rate": 1.8420708006520012e-05, + "loss": 1.0861, + "step": 166300 + }, + { + "epoch": 2.32, + "learning_rate": 1.8413742180860695e-05, + "loss": 1.0763, + "step": 166400 + }, + { + "epoch": 2.32, + "learning_rate": 1.840677635520138e-05, + "loss": 1.0291, + "step": 166500 + }, + { + "epoch": 2.32, + "learning_rate": 1.8399810529542065e-05, + "loss": 1.0815, + "step": 166600 + }, + { + "epoch": 2.32, + "learning_rate": 1.839284470388275e-05, + "loss": 1.0771, + "step": 166700 + }, + { + "epoch": 2.32, + "learning_rate": 1.8385878878223434e-05, + "loss": 1.0989, + "step": 166800 + }, + { + "epoch": 2.33, + "learning_rate": 1.837891305256412e-05, + "loss": 1.0677, + "step": 166900 + }, + { + "epoch": 2.33, + "learning_rate": 1.8371947226904803e-05, + "loss": 1.0529, + "step": 167000 + }, + { + "epoch": 2.33, + "learning_rate": 1.836498140124549e-05, + "loss": 1.0796, + "step": 167100 + }, + { + "epoch": 2.33, + "learning_rate": 1.8358015575586176e-05, + "loss": 1.0991, + "step": 167200 + }, + { + "epoch": 2.33, + "learning_rate": 1.835104974992686e-05, + "loss": 1.0839, + "step": 167300 + }, + { + "epoch": 2.33, + "learning_rate": 1.8344083924267545e-05, + "loss": 1.1401, + "step": 167400 + }, + { + "epoch": 2.33, + "learning_rate": 1.833711809860823e-05, + "loss": 1.0324, + "step": 167500 + }, + { + "epoch": 2.33, + "learning_rate": 1.8330152272948915e-05, + "loss": 1.0832, + "step": 167600 + }, + { + "epoch": 2.34, + "learning_rate": 1.8323186447289598e-05, + "loss": 1.0761, + "step": 167700 + }, + { + "epoch": 2.34, + "learning_rate": 1.8316220621630284e-05, + "loss": 1.0775, + "step": 167800 + }, + { + "epoch": 2.34, + "learning_rate": 1.8309254795970967e-05, + "loss": 1.1074, + "step": 167900 + }, + { + "epoch": 2.34, + "learning_rate": 1.8302288970311654e-05, + "loss": 1.0927, + "step": 168000 + }, + { + "epoch": 2.34, + "learning_rate": 1.8295323144652337e-05, + "loss": 1.0714, + "step": 168100 + }, + { + "epoch": 2.34, + "learning_rate": 1.8288357318993023e-05, + "loss": 1.0387, + "step": 168200 + }, + { + "epoch": 2.34, + "learning_rate": 1.8281391493333706e-05, + "loss": 1.096, + "step": 168300 + }, + { + "epoch": 2.35, + "learning_rate": 1.8274425667674392e-05, + "loss": 1.0691, + "step": 168400 + }, + { + "epoch": 2.35, + "learning_rate": 1.8267459842015072e-05, + "loss": 1.0612, + "step": 168500 + }, + { + "epoch": 2.35, + "learning_rate": 1.8260494016355758e-05, + "loss": 1.0746, + "step": 168600 + }, + { + "epoch": 2.35, + "learning_rate": 1.825352819069644e-05, + "loss": 1.0709, + "step": 168700 + }, + { + "epoch": 2.35, + "learning_rate": 1.8246562365037128e-05, + "loss": 1.0541, + "step": 168800 + }, + { + "epoch": 2.35, + "learning_rate": 1.823959653937781e-05, + "loss": 1.0693, + "step": 168900 + }, + { + "epoch": 2.35, + "learning_rate": 1.8232630713718497e-05, + "loss": 1.0334, + "step": 169000 + }, + { + "epoch": 2.36, + "learning_rate": 1.8225734546315773e-05, + "loss": 1.0684, + "step": 169100 + }, + { + "epoch": 2.36, + "learning_rate": 1.821876872065646e-05, + "loss": 1.1119, + "step": 169200 + }, + { + "epoch": 2.36, + "learning_rate": 1.8211802894997143e-05, + "loss": 1.0625, + "step": 169300 + }, + { + "epoch": 2.36, + "learning_rate": 1.820483706933783e-05, + "loss": 1.078, + "step": 169400 + }, + { + "epoch": 2.36, + "learning_rate": 1.8197871243678512e-05, + "loss": 1.0941, + "step": 169500 + }, + { + "epoch": 2.36, + "learning_rate": 1.81909054180192e-05, + "loss": 1.1107, + "step": 169600 + }, + { + "epoch": 2.36, + "learning_rate": 1.818393959235988e-05, + "loss": 1.0978, + "step": 169700 + }, + { + "epoch": 2.37, + "learning_rate": 1.8176973766700568e-05, + "loss": 1.0845, + "step": 169800 + }, + { + "epoch": 2.37, + "learning_rate": 1.817000794104125e-05, + "loss": 1.058, + "step": 169900 + }, + { + "epoch": 2.37, + "learning_rate": 1.8163042115381937e-05, + "loss": 1.0849, + "step": 170000 + }, + { + "epoch": 2.37, + "learning_rate": 1.815607628972262e-05, + "loss": 1.1009, + "step": 170100 + }, + { + "epoch": 2.37, + "learning_rate": 1.8149110464063307e-05, + "loss": 1.1185, + "step": 170200 + }, + { + "epoch": 2.37, + "learning_rate": 1.814214463840399e-05, + "loss": 1.0886, + "step": 170300 + }, + { + "epoch": 2.37, + "learning_rate": 1.8135178812744676e-05, + "loss": 1.0958, + "step": 170400 + }, + { + "epoch": 2.38, + "learning_rate": 1.8128212987085362e-05, + "loss": 1.0732, + "step": 170500 + }, + { + "epoch": 2.38, + "learning_rate": 1.8121247161426045e-05, + "loss": 1.0757, + "step": 170600 + }, + { + "epoch": 2.38, + "learning_rate": 1.8114281335766732e-05, + "loss": 1.0901, + "step": 170700 + }, + { + "epoch": 2.38, + "learning_rate": 1.8107315510107415e-05, + "loss": 1.1048, + "step": 170800 + }, + { + "epoch": 2.38, + "learning_rate": 1.8100349684448098e-05, + "loss": 1.0625, + "step": 170900 + }, + { + "epoch": 2.38, + "learning_rate": 1.809338385878878e-05, + "loss": 1.0933, + "step": 171000 + }, + { + "epoch": 2.38, + "learning_rate": 1.8086418033129467e-05, + "loss": 1.0984, + "step": 171100 + }, + { + "epoch": 2.39, + "learning_rate": 1.807945220747015e-05, + "loss": 1.0805, + "step": 171200 + }, + { + "epoch": 2.39, + "learning_rate": 1.8072486381810836e-05, + "loss": 1.0746, + "step": 171300 + }, + { + "epoch": 2.39, + "learning_rate": 1.806552055615152e-05, + "loss": 1.0634, + "step": 171400 + }, + { + "epoch": 2.39, + "learning_rate": 1.8058554730492206e-05, + "loss": 1.074, + "step": 171500 + }, + { + "epoch": 2.39, + "learning_rate": 1.805158890483289e-05, + "loss": 1.0932, + "step": 171600 + }, + { + "epoch": 2.39, + "learning_rate": 1.8044623079173575e-05, + "loss": 1.0733, + "step": 171700 + }, + { + "epoch": 2.39, + "learning_rate": 1.8037657253514258e-05, + "loss": 1.0979, + "step": 171800 + }, + { + "epoch": 2.39, + "learning_rate": 1.8030691427854945e-05, + "loss": 1.1366, + "step": 171900 + }, + { + "epoch": 2.4, + "learning_rate": 1.8023725602195628e-05, + "loss": 1.0854, + "step": 172000 + }, + { + "epoch": 2.4, + "learning_rate": 1.8016759776536314e-05, + "loss": 1.0839, + "step": 172100 + }, + { + "epoch": 2.4, + "learning_rate": 1.8009793950876997e-05, + "loss": 1.0771, + "step": 172200 + }, + { + "epoch": 2.4, + "learning_rate": 1.8002828125217683e-05, + "loss": 1.0912, + "step": 172300 + }, + { + "epoch": 2.4, + "learning_rate": 1.7995862299558366e-05, + "loss": 1.0832, + "step": 172400 + }, + { + "epoch": 2.4, + "learning_rate": 1.7988896473899053e-05, + "loss": 1.0872, + "step": 172500 + }, + { + "epoch": 2.4, + "learning_rate": 1.7981930648239736e-05, + "loss": 1.1261, + "step": 172600 + }, + { + "epoch": 2.41, + "learning_rate": 1.7974964822580422e-05, + "loss": 1.0622, + "step": 172700 + }, + { + "epoch": 2.41, + "learning_rate": 1.7967998996921105e-05, + "loss": 1.0816, + "step": 172800 + }, + { + "epoch": 2.41, + "learning_rate": 1.796103317126179e-05, + "loss": 1.0919, + "step": 172900 + }, + { + "epoch": 2.41, + "learning_rate": 1.7954067345602478e-05, + "loss": 1.0751, + "step": 173000 + }, + { + "epoch": 2.41, + "learning_rate": 1.794710151994316e-05, + "loss": 1.0811, + "step": 173100 + }, + { + "epoch": 2.41, + "learning_rate": 1.7940135694283844e-05, + "loss": 1.1039, + "step": 173200 + }, + { + "epoch": 2.41, + "learning_rate": 1.7933169868624527e-05, + "loss": 1.079, + "step": 173300 + }, + { + "epoch": 2.42, + "learning_rate": 1.7926204042965213e-05, + "loss": 1.074, + "step": 173400 + }, + { + "epoch": 2.42, + "learning_rate": 1.7919238217305896e-05, + "loss": 1.1371, + "step": 173500 + }, + { + "epoch": 2.42, + "learning_rate": 1.7912272391646583e-05, + "loss": 1.0723, + "step": 173600 + }, + { + "epoch": 2.42, + "learning_rate": 1.7905306565987266e-05, + "loss": 1.1076, + "step": 173700 + }, + { + "epoch": 2.42, + "learning_rate": 1.7898340740327952e-05, + "loss": 1.0711, + "step": 173800 + }, + { + "epoch": 2.42, + "learning_rate": 1.7891374914668635e-05, + "loss": 1.0799, + "step": 173900 + }, + { + "epoch": 2.42, + "learning_rate": 1.788440908900932e-05, + "loss": 1.0692, + "step": 174000 + }, + { + "epoch": 2.43, + "learning_rate": 1.7877443263350004e-05, + "loss": 1.0965, + "step": 174100 + }, + { + "epoch": 2.43, + "learning_rate": 1.7870547095947284e-05, + "loss": 1.061, + "step": 174200 + }, + { + "epoch": 2.43, + "learning_rate": 1.7863581270287967e-05, + "loss": 1.0913, + "step": 174300 + }, + { + "epoch": 2.43, + "learning_rate": 1.7856615444628653e-05, + "loss": 1.0876, + "step": 174400 + }, + { + "epoch": 2.43, + "learning_rate": 1.7849649618969336e-05, + "loss": 1.0911, + "step": 174500 + }, + { + "epoch": 2.43, + "learning_rate": 1.7842683793310023e-05, + "loss": 1.0707, + "step": 174600 + }, + { + "epoch": 2.43, + "learning_rate": 1.7835717967650706e-05, + "loss": 1.0763, + "step": 174700 + }, + { + "epoch": 2.44, + "learning_rate": 1.7828752141991392e-05, + "loss": 1.1131, + "step": 174800 + }, + { + "epoch": 2.44, + "learning_rate": 1.7821786316332075e-05, + "loss": 1.0556, + "step": 174900 + }, + { + "epoch": 2.44, + "learning_rate": 1.781482049067276e-05, + "loss": 1.0817, + "step": 175000 + }, + { + "epoch": 2.44, + "learning_rate": 1.7807854665013445e-05, + "loss": 1.052, + "step": 175100 + }, + { + "epoch": 2.44, + "learning_rate": 1.780088883935413e-05, + "loss": 1.0742, + "step": 175200 + }, + { + "epoch": 2.44, + "learning_rate": 1.7793923013694814e-05, + "loss": 1.1105, + "step": 175300 + }, + { + "epoch": 2.44, + "learning_rate": 1.77869571880355e-05, + "loss": 1.0677, + "step": 175400 + }, + { + "epoch": 2.45, + "learning_rate": 1.7779991362376183e-05, + "loss": 1.0662, + "step": 175500 + }, + { + "epoch": 2.45, + "learning_rate": 1.7773025536716866e-05, + "loss": 1.0562, + "step": 175600 + }, + { + "epoch": 2.45, + "learning_rate": 1.776605971105755e-05, + "loss": 1.0825, + "step": 175700 + }, + { + "epoch": 2.45, + "learning_rate": 1.7759093885398236e-05, + "loss": 1.0602, + "step": 175800 + }, + { + "epoch": 2.45, + "learning_rate": 1.775212805973892e-05, + "loss": 1.0643, + "step": 175900 + }, + { + "epoch": 2.45, + "learning_rate": 1.7745162234079605e-05, + "loss": 1.099, + "step": 176000 + }, + { + "epoch": 2.45, + "learning_rate": 1.773819640842029e-05, + "loss": 1.0983, + "step": 176100 + }, + { + "epoch": 2.45, + "learning_rate": 1.7731230582760974e-05, + "loss": 1.0967, + "step": 176200 + }, + { + "epoch": 2.46, + "learning_rate": 1.772426475710166e-05, + "loss": 1.0932, + "step": 176300 + }, + { + "epoch": 2.46, + "learning_rate": 1.7717298931442344e-05, + "loss": 1.1169, + "step": 176400 + }, + { + "epoch": 2.46, + "learning_rate": 1.771033310578303e-05, + "loss": 1.0628, + "step": 176500 + }, + { + "epoch": 2.46, + "learning_rate": 1.7703367280123713e-05, + "loss": 1.0565, + "step": 176600 + }, + { + "epoch": 2.46, + "learning_rate": 1.76964014544644e-05, + "loss": 1.0957, + "step": 176700 + }, + { + "epoch": 2.46, + "learning_rate": 1.7689435628805082e-05, + "loss": 1.102, + "step": 176800 + }, + { + "epoch": 2.46, + "learning_rate": 1.768246980314577e-05, + "loss": 1.0591, + "step": 176900 + }, + { + "epoch": 2.47, + "learning_rate": 1.7675503977486452e-05, + "loss": 1.0839, + "step": 177000 + }, + { + "epoch": 2.47, + "learning_rate": 1.7668538151827138e-05, + "loss": 1.0878, + "step": 177100 + }, + { + "epoch": 2.47, + "learning_rate": 1.766157232616782e-05, + "loss": 1.126, + "step": 177200 + }, + { + "epoch": 2.47, + "learning_rate": 1.7654606500508508e-05, + "loss": 1.0994, + "step": 177300 + }, + { + "epoch": 2.47, + "learning_rate": 1.764764067484919e-05, + "loss": 1.0516, + "step": 177400 + }, + { + "epoch": 2.47, + "learning_rate": 1.7640674849189877e-05, + "loss": 1.0712, + "step": 177500 + }, + { + "epoch": 2.47, + "learning_rate": 1.763370902353056e-05, + "loss": 1.0929, + "step": 177600 + }, + { + "epoch": 2.48, + "learning_rate": 1.7626743197871246e-05, + "loss": 1.0808, + "step": 177700 + }, + { + "epoch": 2.48, + "learning_rate": 1.7619777372211926e-05, + "loss": 1.0607, + "step": 177800 + }, + { + "epoch": 2.48, + "learning_rate": 1.7612811546552612e-05, + "loss": 1.1114, + "step": 177900 + }, + { + "epoch": 2.48, + "learning_rate": 1.7605845720893295e-05, + "loss": 1.0941, + "step": 178000 + }, + { + "epoch": 2.48, + "learning_rate": 1.759887989523398e-05, + "loss": 1.0774, + "step": 178100 + }, + { + "epoch": 2.48, + "learning_rate": 1.7591914069574665e-05, + "loss": 1.0861, + "step": 178200 + }, + { + "epoch": 2.48, + "learning_rate": 1.758494824391535e-05, + "loss": 1.0952, + "step": 178300 + }, + { + "epoch": 2.49, + "learning_rate": 1.7577982418256034e-05, + "loss": 1.081, + "step": 178400 + }, + { + "epoch": 2.49, + "learning_rate": 1.7571086250853314e-05, + "loss": 1.0808, + "step": 178500 + }, + { + "epoch": 2.49, + "learning_rate": 1.7564120425193997e-05, + "loss": 1.069, + "step": 178600 + }, + { + "epoch": 2.49, + "learning_rate": 1.7557154599534683e-05, + "loss": 1.0716, + "step": 178700 + }, + { + "epoch": 2.49, + "learning_rate": 1.7550188773875366e-05, + "loss": 1.0677, + "step": 178800 + }, + { + "epoch": 2.49, + "learning_rate": 1.7543222948216053e-05, + "loss": 1.0636, + "step": 178900 + }, + { + "epoch": 2.49, + "learning_rate": 1.753632678081333e-05, + "loss": 1.074, + "step": 179000 + }, + { + "epoch": 2.5, + "learning_rate": 1.7529360955154015e-05, + "loss": 1.0737, + "step": 179100 + }, + { + "epoch": 2.5, + "learning_rate": 1.75223951294947e-05, + "loss": 1.0935, + "step": 179200 + }, + { + "epoch": 2.5, + "learning_rate": 1.7515429303835385e-05, + "loss": 1.0565, + "step": 179300 + }, + { + "epoch": 2.5, + "learning_rate": 1.7508463478176068e-05, + "loss": 1.0837, + "step": 179400 + }, + { + "epoch": 2.5, + "learning_rate": 1.7501497652516754e-05, + "loss": 1.0872, + "step": 179500 + }, + { + "epoch": 2.5, + "learning_rate": 1.7494531826857437e-05, + "loss": 1.0564, + "step": 179600 + }, + { + "epoch": 2.5, + "learning_rate": 1.7487566001198123e-05, + "loss": 1.0401, + "step": 179700 + }, + { + "epoch": 2.5, + "learning_rate": 1.7480600175538806e-05, + "loss": 1.0782, + "step": 179800 + }, + { + "epoch": 2.51, + "learning_rate": 1.7473634349879493e-05, + "loss": 1.0679, + "step": 179900 + }, + { + "epoch": 2.51, + "learning_rate": 1.7466668524220176e-05, + "loss": 1.07, + "step": 180000 + }, + { + "epoch": 2.51, + "learning_rate": 1.7459702698560862e-05, + "loss": 1.0541, + "step": 180100 + }, + { + "epoch": 2.51, + "learning_rate": 1.745273687290155e-05, + "loss": 1.0779, + "step": 180200 + }, + { + "epoch": 2.51, + "learning_rate": 1.744577104724223e-05, + "loss": 1.0653, + "step": 180300 + }, + { + "epoch": 2.51, + "learning_rate": 1.7438805221582915e-05, + "loss": 1.0546, + "step": 180400 + }, + { + "epoch": 2.51, + "learning_rate": 1.7431839395923598e-05, + "loss": 1.1075, + "step": 180500 + }, + { + "epoch": 2.52, + "learning_rate": 1.7424873570264284e-05, + "loss": 1.0602, + "step": 180600 + }, + { + "epoch": 2.52, + "learning_rate": 1.7417907744604967e-05, + "loss": 1.0899, + "step": 180700 + }, + { + "epoch": 2.52, + "learning_rate": 1.7410941918945653e-05, + "loss": 1.0677, + "step": 180800 + }, + { + "epoch": 2.52, + "learning_rate": 1.7403976093286336e-05, + "loss": 1.1195, + "step": 180900 + }, + { + "epoch": 2.52, + "learning_rate": 1.7397010267627023e-05, + "loss": 1.0993, + "step": 181000 + }, + { + "epoch": 2.52, + "learning_rate": 1.7390044441967706e-05, + "loss": 1.0639, + "step": 181100 + }, + { + "epoch": 2.52, + "learning_rate": 1.7383078616308392e-05, + "loss": 1.0561, + "step": 181200 + }, + { + "epoch": 2.53, + "learning_rate": 1.7376112790649075e-05, + "loss": 1.0863, + "step": 181300 + }, + { + "epoch": 2.53, + "learning_rate": 1.736914696498976e-05, + "loss": 1.1163, + "step": 181400 + }, + { + "epoch": 2.53, + "learning_rate": 1.7362181139330444e-05, + "loss": 1.1151, + "step": 181500 + }, + { + "epoch": 2.53, + "learning_rate": 1.735521531367113e-05, + "loss": 1.093, + "step": 181600 + }, + { + "epoch": 2.53, + "learning_rate": 1.7348249488011814e-05, + "loss": 1.0777, + "step": 181700 + }, + { + "epoch": 2.53, + "learning_rate": 1.73412836623525e-05, + "loss": 1.0636, + "step": 181800 + }, + { + "epoch": 2.53, + "learning_rate": 1.7334317836693183e-05, + "loss": 1.0995, + "step": 181900 + }, + { + "epoch": 2.54, + "learning_rate": 1.732735201103387e-05, + "loss": 1.1191, + "step": 182000 + }, + { + "epoch": 2.54, + "learning_rate": 1.7320386185374552e-05, + "loss": 1.0695, + "step": 182100 + }, + { + "epoch": 2.54, + "learning_rate": 1.731342035971524e-05, + "loss": 1.078, + "step": 182200 + }, + { + "epoch": 2.54, + "learning_rate": 1.7306524192312515e-05, + "loss": 1.0821, + "step": 182300 + }, + { + "epoch": 2.54, + "learning_rate": 1.72995583666532e-05, + "loss": 1.103, + "step": 182400 + }, + { + "epoch": 2.54, + "learning_rate": 1.7292592540993885e-05, + "loss": 1.0857, + "step": 182500 + }, + { + "epoch": 2.54, + "learning_rate": 1.728562671533457e-05, + "loss": 1.0897, + "step": 182600 + }, + { + "epoch": 2.55, + "learning_rate": 1.7278660889675254e-05, + "loss": 1.082, + "step": 182700 + }, + { + "epoch": 2.55, + "learning_rate": 1.727169506401594e-05, + "loss": 1.0776, + "step": 182800 + }, + { + "epoch": 2.55, + "learning_rate": 1.726472923835662e-05, + "loss": 1.0197, + "step": 182900 + }, + { + "epoch": 2.55, + "learning_rate": 1.7257763412697306e-05, + "loss": 1.0843, + "step": 183000 + }, + { + "epoch": 2.55, + "learning_rate": 1.725079758703799e-05, + "loss": 1.0958, + "step": 183100 + }, + { + "epoch": 2.55, + "learning_rate": 1.7243831761378676e-05, + "loss": 1.1242, + "step": 183200 + }, + { + "epoch": 2.55, + "learning_rate": 1.723686593571936e-05, + "loss": 1.0766, + "step": 183300 + }, + { + "epoch": 2.56, + "learning_rate": 1.7229900110060045e-05, + "loss": 1.0642, + "step": 183400 + }, + { + "epoch": 2.56, + "learning_rate": 1.722293428440073e-05, + "loss": 1.0896, + "step": 183500 + }, + { + "epoch": 2.56, + "learning_rate": 1.7215968458741414e-05, + "loss": 1.0786, + "step": 183600 + }, + { + "epoch": 2.56, + "learning_rate": 1.72090026330821e-05, + "loss": 1.1146, + "step": 183700 + }, + { + "epoch": 2.56, + "learning_rate": 1.7202036807422784e-05, + "loss": 1.1031, + "step": 183800 + }, + { + "epoch": 2.56, + "learning_rate": 1.719507098176347e-05, + "loss": 1.0745, + "step": 183900 + }, + { + "epoch": 2.56, + "learning_rate": 1.7188105156104153e-05, + "loss": 1.0727, + "step": 184000 + }, + { + "epoch": 2.56, + "learning_rate": 1.718113933044484e-05, + "loss": 1.0804, + "step": 184100 + }, + { + "epoch": 2.57, + "learning_rate": 1.7174173504785523e-05, + "loss": 1.0792, + "step": 184200 + }, + { + "epoch": 2.57, + "learning_rate": 1.716720767912621e-05, + "loss": 1.0806, + "step": 184300 + }, + { + "epoch": 2.57, + "learning_rate": 1.7160241853466892e-05, + "loss": 1.0611, + "step": 184400 + }, + { + "epoch": 2.57, + "learning_rate": 1.7153276027807578e-05, + "loss": 1.0855, + "step": 184500 + }, + { + "epoch": 2.57, + "learning_rate": 1.7146379860404855e-05, + "loss": 1.0496, + "step": 184600 + }, + { + "epoch": 2.57, + "learning_rate": 1.713941403474554e-05, + "loss": 1.0807, + "step": 184700 + }, + { + "epoch": 2.57, + "learning_rate": 1.7132448209086224e-05, + "loss": 1.0464, + "step": 184800 + }, + { + "epoch": 2.58, + "learning_rate": 1.712548238342691e-05, + "loss": 1.0566, + "step": 184900 + }, + { + "epoch": 2.58, + "learning_rate": 1.7118516557767593e-05, + "loss": 1.07, + "step": 185000 + }, + { + "epoch": 2.58, + "learning_rate": 1.711155073210828e-05, + "loss": 1.0464, + "step": 185100 + }, + { + "epoch": 2.58, + "learning_rate": 1.7104584906448963e-05, + "loss": 1.0723, + "step": 185200 + }, + { + "epoch": 2.58, + "learning_rate": 1.7097619080789646e-05, + "loss": 1.0604, + "step": 185300 + }, + { + "epoch": 2.58, + "learning_rate": 1.709065325513033e-05, + "loss": 1.0921, + "step": 185400 + }, + { + "epoch": 2.58, + "learning_rate": 1.7083687429471015e-05, + "loss": 1.0819, + "step": 185500 + }, + { + "epoch": 2.59, + "learning_rate": 1.7076721603811698e-05, + "loss": 1.0824, + "step": 185600 + }, + { + "epoch": 2.59, + "learning_rate": 1.7069755778152385e-05, + "loss": 1.0815, + "step": 185700 + }, + { + "epoch": 2.59, + "learning_rate": 1.7062789952493068e-05, + "loss": 1.0748, + "step": 185800 + }, + { + "epoch": 2.59, + "learning_rate": 1.7055824126833754e-05, + "loss": 1.0746, + "step": 185900 + }, + { + "epoch": 2.59, + "learning_rate": 1.7048858301174437e-05, + "loss": 1.1041, + "step": 186000 + }, + { + "epoch": 2.59, + "learning_rate": 1.7041892475515123e-05, + "loss": 1.056, + "step": 186100 + }, + { + "epoch": 2.59, + "learning_rate": 1.7034926649855806e-05, + "loss": 1.0595, + "step": 186200 + }, + { + "epoch": 2.6, + "learning_rate": 1.7027960824196493e-05, + "loss": 1.1017, + "step": 186300 + }, + { + "epoch": 2.6, + "learning_rate": 1.7020994998537176e-05, + "loss": 1.0718, + "step": 186400 + }, + { + "epoch": 2.6, + "learning_rate": 1.7014029172877862e-05, + "loss": 1.0939, + "step": 186500 + }, + { + "epoch": 2.6, + "learning_rate": 1.7007063347218545e-05, + "loss": 1.0789, + "step": 186600 + }, + { + "epoch": 2.6, + "learning_rate": 1.700009752155923e-05, + "loss": 1.0848, + "step": 186700 + }, + { + "epoch": 2.6, + "learning_rate": 1.6993201354156508e-05, + "loss": 1.1197, + "step": 186800 + }, + { + "epoch": 2.6, + "learning_rate": 1.6986235528497194e-05, + "loss": 1.0867, + "step": 186900 + }, + { + "epoch": 2.61, + "learning_rate": 1.6979269702837877e-05, + "loss": 1.1139, + "step": 187000 + }, + { + "epoch": 2.61, + "learning_rate": 1.6972303877178564e-05, + "loss": 1.1051, + "step": 187100 + }, + { + "epoch": 2.61, + "learning_rate": 1.6965338051519247e-05, + "loss": 1.1226, + "step": 187200 + }, + { + "epoch": 2.61, + "learning_rate": 1.6958372225859933e-05, + "loss": 1.0646, + "step": 187300 + }, + { + "epoch": 2.61, + "learning_rate": 1.6951406400200616e-05, + "loss": 1.079, + "step": 187400 + }, + { + "epoch": 2.61, + "learning_rate": 1.6944440574541302e-05, + "loss": 1.1046, + "step": 187500 + }, + { + "epoch": 2.61, + "learning_rate": 1.693747474888199e-05, + "loss": 1.0883, + "step": 187600 + }, + { + "epoch": 2.61, + "learning_rate": 1.6930508923222668e-05, + "loss": 1.0878, + "step": 187700 + }, + { + "epoch": 2.62, + "learning_rate": 1.6923543097563355e-05, + "loss": 1.045, + "step": 187800 + }, + { + "epoch": 2.62, + "learning_rate": 1.6916577271904038e-05, + "loss": 1.1132, + "step": 187900 + }, + { + "epoch": 2.62, + "learning_rate": 1.6909611446244724e-05, + "loss": 1.0981, + "step": 188000 + }, + { + "epoch": 2.62, + "learning_rate": 1.6902645620585407e-05, + "loss": 1.0895, + "step": 188100 + }, + { + "epoch": 2.62, + "learning_rate": 1.6895679794926093e-05, + "loss": 1.0495, + "step": 188200 + }, + { + "epoch": 2.62, + "learning_rate": 1.6888713969266776e-05, + "loss": 1.0456, + "step": 188300 + }, + { + "epoch": 2.62, + "learning_rate": 1.6881748143607463e-05, + "loss": 1.1049, + "step": 188400 + }, + { + "epoch": 2.63, + "learning_rate": 1.6874782317948146e-05, + "loss": 1.0673, + "step": 188500 + }, + { + "epoch": 2.63, + "learning_rate": 1.6867816492288832e-05, + "loss": 1.0426, + "step": 188600 + }, + { + "epoch": 2.63, + "learning_rate": 1.6860850666629515e-05, + "loss": 1.1219, + "step": 188700 + }, + { + "epoch": 2.63, + "learning_rate": 1.68538848409702e-05, + "loss": 1.1085, + "step": 188800 + }, + { + "epoch": 2.63, + "learning_rate": 1.6846919015310884e-05, + "loss": 1.1014, + "step": 188900 + }, + { + "epoch": 2.63, + "learning_rate": 1.683995318965157e-05, + "loss": 1.1089, + "step": 189000 + }, + { + "epoch": 2.63, + "learning_rate": 1.6832987363992254e-05, + "loss": 1.0614, + "step": 189100 + }, + { + "epoch": 2.64, + "learning_rate": 1.682602153833294e-05, + "loss": 1.0551, + "step": 189200 + }, + { + "epoch": 2.64, + "learning_rate": 1.6819055712673623e-05, + "loss": 1.0755, + "step": 189300 + }, + { + "epoch": 2.64, + "learning_rate": 1.681208988701431e-05, + "loss": 1.062, + "step": 189400 + }, + { + "epoch": 2.64, + "learning_rate": 1.6805124061354993e-05, + "loss": 1.0667, + "step": 189500 + }, + { + "epoch": 2.64, + "learning_rate": 1.679815823569568e-05, + "loss": 1.0606, + "step": 189600 + }, + { + "epoch": 2.64, + "learning_rate": 1.6791192410036362e-05, + "loss": 1.0815, + "step": 189700 + }, + { + "epoch": 2.64, + "learning_rate": 1.6784226584377048e-05, + "loss": 1.0908, + "step": 189800 + }, + { + "epoch": 2.65, + "learning_rate": 1.6777260758717728e-05, + "loss": 1.0862, + "step": 189900 + }, + { + "epoch": 2.65, + "learning_rate": 1.6770294933058414e-05, + "loss": 1.0912, + "step": 190000 + }, + { + "epoch": 2.65, + "learning_rate": 1.676339876565569e-05, + "loss": 1.1108, + "step": 190100 + }, + { + "epoch": 2.65, + "learning_rate": 1.6756432939996377e-05, + "loss": 1.0989, + "step": 190200 + }, + { + "epoch": 2.65, + "learning_rate": 1.674946711433706e-05, + "loss": 1.0668, + "step": 190300 + }, + { + "epoch": 2.65, + "learning_rate": 1.6742501288677746e-05, + "loss": 1.1185, + "step": 190400 + }, + { + "epoch": 2.65, + "learning_rate": 1.673553546301843e-05, + "loss": 1.0967, + "step": 190500 + }, + { + "epoch": 2.66, + "learning_rate": 1.6728569637359116e-05, + "loss": 1.0713, + "step": 190600 + }, + { + "epoch": 2.66, + "learning_rate": 1.67216038116998e-05, + "loss": 1.072, + "step": 190700 + }, + { + "epoch": 2.66, + "learning_rate": 1.6714637986040485e-05, + "loss": 1.0556, + "step": 190800 + }, + { + "epoch": 2.66, + "learning_rate": 1.670767216038117e-05, + "loss": 1.1068, + "step": 190900 + }, + { + "epoch": 2.66, + "learning_rate": 1.6700706334721855e-05, + "loss": 1.0761, + "step": 191000 + }, + { + "epoch": 2.66, + "learning_rate": 1.669374050906254e-05, + "loss": 1.08, + "step": 191100 + }, + { + "epoch": 2.66, + "learning_rate": 1.6686774683403224e-05, + "loss": 1.0995, + "step": 191200 + }, + { + "epoch": 2.67, + "learning_rate": 1.667980885774391e-05, + "loss": 1.1142, + "step": 191300 + }, + { + "epoch": 2.67, + "learning_rate": 1.6672843032084593e-05, + "loss": 1.0918, + "step": 191400 + }, + { + "epoch": 2.67, + "learning_rate": 1.666587720642528e-05, + "loss": 1.0756, + "step": 191500 + }, + { + "epoch": 2.67, + "learning_rate": 1.6658911380765963e-05, + "loss": 1.1105, + "step": 191600 + }, + { + "epoch": 2.67, + "learning_rate": 1.665194555510665e-05, + "loss": 1.0649, + "step": 191700 + }, + { + "epoch": 2.67, + "learning_rate": 1.6644979729447332e-05, + "loss": 1.0424, + "step": 191800 + }, + { + "epoch": 2.67, + "learning_rate": 1.663801390378802e-05, + "loss": 1.0836, + "step": 191900 + }, + { + "epoch": 2.67, + "learning_rate": 1.66310480781287e-05, + "loss": 1.0363, + "step": 192000 + }, + { + "epoch": 2.68, + "learning_rate": 1.6624082252469388e-05, + "loss": 1.085, + "step": 192100 + }, + { + "epoch": 2.68, + "learning_rate": 1.661711642681007e-05, + "loss": 1.1017, + "step": 192200 + }, + { + "epoch": 2.68, + "learning_rate": 1.6610150601150757e-05, + "loss": 1.0552, + "step": 192300 + }, + { + "epoch": 2.68, + "learning_rate": 1.6603184775491437e-05, + "loss": 1.0586, + "step": 192400 + }, + { + "epoch": 2.68, + "learning_rate": 1.6596218949832123e-05, + "loss": 1.0569, + "step": 192500 + }, + { + "epoch": 2.68, + "learning_rate": 1.6589253124172806e-05, + "loss": 1.0751, + "step": 192600 + }, + { + "epoch": 2.68, + "learning_rate": 1.6582287298513492e-05, + "loss": 1.0993, + "step": 192700 + }, + { + "epoch": 2.69, + "learning_rate": 1.6575321472854175e-05, + "loss": 1.0761, + "step": 192800 + }, + { + "epoch": 2.69, + "learning_rate": 1.6568355647194862e-05, + "loss": 1.0457, + "step": 192900 + }, + { + "epoch": 2.69, + "learning_rate": 1.6561389821535545e-05, + "loss": 1.0316, + "step": 193000 + }, + { + "epoch": 2.69, + "learning_rate": 1.655442399587623e-05, + "loss": 1.0627, + "step": 193100 + }, + { + "epoch": 2.69, + "learning_rate": 1.6547458170216914e-05, + "loss": 1.0716, + "step": 193200 + }, + { + "epoch": 2.69, + "learning_rate": 1.65404923445576e-05, + "loss": 1.1109, + "step": 193300 + }, + { + "epoch": 2.69, + "learning_rate": 1.6533526518898287e-05, + "loss": 1.1173, + "step": 193400 + }, + { + "epoch": 2.7, + "learning_rate": 1.652656069323897e-05, + "loss": 1.0714, + "step": 193500 + }, + { + "epoch": 2.7, + "learning_rate": 1.6519594867579656e-05, + "loss": 1.0919, + "step": 193600 + }, + { + "epoch": 2.7, + "learning_rate": 1.651262904192034e-05, + "loss": 1.0783, + "step": 193700 + }, + { + "epoch": 2.7, + "learning_rate": 1.6505663216261026e-05, + "loss": 1.1043, + "step": 193800 + }, + { + "epoch": 2.7, + "learning_rate": 1.649869739060171e-05, + "loss": 1.0848, + "step": 193900 + }, + { + "epoch": 2.7, + "learning_rate": 1.6491731564942395e-05, + "loss": 1.1154, + "step": 194000 + }, + { + "epoch": 2.7, + "learning_rate": 1.6484765739283078e-05, + "loss": 1.077, + "step": 194100 + }, + { + "epoch": 2.71, + "learning_rate": 1.6477799913623764e-05, + "loss": 1.126, + "step": 194200 + }, + { + "epoch": 2.71, + "learning_rate": 1.647090374622104e-05, + "loss": 1.1002, + "step": 194300 + }, + { + "epoch": 2.71, + "learning_rate": 1.6463937920561727e-05, + "loss": 1.075, + "step": 194400 + }, + { + "epoch": 2.71, + "learning_rate": 1.645697209490241e-05, + "loss": 1.0668, + "step": 194500 + }, + { + "epoch": 2.71, + "learning_rate": 1.6450075927499687e-05, + "loss": 1.0743, + "step": 194600 + }, + { + "epoch": 2.71, + "learning_rate": 1.6443110101840373e-05, + "loss": 1.0968, + "step": 194700 + }, + { + "epoch": 2.71, + "learning_rate": 1.6436144276181056e-05, + "loss": 1.0653, + "step": 194800 + }, + { + "epoch": 2.72, + "learning_rate": 1.6429178450521742e-05, + "loss": 1.0899, + "step": 194900 + }, + { + "epoch": 2.72, + "learning_rate": 1.6422212624862425e-05, + "loss": 1.0995, + "step": 195000 + }, + { + "epoch": 2.72, + "learning_rate": 1.641524679920311e-05, + "loss": 1.0823, + "step": 195100 + }, + { + "epoch": 2.72, + "learning_rate": 1.6408280973543795e-05, + "loss": 1.0645, + "step": 195200 + }, + { + "epoch": 2.72, + "learning_rate": 1.6401315147884478e-05, + "loss": 1.0751, + "step": 195300 + }, + { + "epoch": 2.72, + "learning_rate": 1.6394349322225164e-05, + "loss": 1.0748, + "step": 195400 + }, + { + "epoch": 2.72, + "learning_rate": 1.6387383496565847e-05, + "loss": 1.1133, + "step": 195500 + }, + { + "epoch": 2.73, + "learning_rate": 1.6380417670906533e-05, + "loss": 1.086, + "step": 195600 + }, + { + "epoch": 2.73, + "learning_rate": 1.6373451845247216e-05, + "loss": 1.0626, + "step": 195700 + }, + { + "epoch": 2.73, + "learning_rate": 1.6366486019587903e-05, + "loss": 1.0763, + "step": 195800 + }, + { + "epoch": 2.73, + "learning_rate": 1.6359520193928586e-05, + "loss": 1.0454, + "step": 195900 + }, + { + "epoch": 2.73, + "learning_rate": 1.6352554368269272e-05, + "loss": 1.1118, + "step": 196000 + }, + { + "epoch": 2.73, + "learning_rate": 1.6345588542609955e-05, + "loss": 1.0515, + "step": 196100 + }, + { + "epoch": 2.73, + "learning_rate": 1.633862271695064e-05, + "loss": 1.0801, + "step": 196200 + }, + { + "epoch": 2.73, + "learning_rate": 1.6331656891291325e-05, + "loss": 1.0742, + "step": 196300 + }, + { + "epoch": 2.74, + "learning_rate": 1.632469106563201e-05, + "loss": 1.0821, + "step": 196400 + }, + { + "epoch": 2.74, + "learning_rate": 1.6317725239972694e-05, + "loss": 1.1027, + "step": 196500 + }, + { + "epoch": 2.74, + "learning_rate": 1.631075941431338e-05, + "loss": 1.0542, + "step": 196600 + }, + { + "epoch": 2.74, + "learning_rate": 1.6303793588654063e-05, + "loss": 1.092, + "step": 196700 + }, + { + "epoch": 2.74, + "learning_rate": 1.629682776299475e-05, + "loss": 1.1127, + "step": 196800 + }, + { + "epoch": 2.74, + "learning_rate": 1.6289861937335433e-05, + "loss": 1.0915, + "step": 196900 + }, + { + "epoch": 2.74, + "learning_rate": 1.628289611167612e-05, + "loss": 1.0801, + "step": 197000 + }, + { + "epoch": 2.75, + "learning_rate": 1.6275930286016802e-05, + "loss": 1.0764, + "step": 197100 + }, + { + "epoch": 2.75, + "learning_rate": 1.6268964460357485e-05, + "loss": 1.073, + "step": 197200 + }, + { + "epoch": 2.75, + "learning_rate": 1.626199863469817e-05, + "loss": 1.1099, + "step": 197300 + }, + { + "epoch": 2.75, + "learning_rate": 1.6255032809038854e-05, + "loss": 1.087, + "step": 197400 + }, + { + "epoch": 2.75, + "learning_rate": 1.624806698337954e-05, + "loss": 1.0516, + "step": 197500 + }, + { + "epoch": 2.75, + "learning_rate": 1.6241101157720224e-05, + "loss": 1.0765, + "step": 197600 + }, + { + "epoch": 2.75, + "learning_rate": 1.62342049903175e-05, + "loss": 1.0956, + "step": 197700 + }, + { + "epoch": 2.76, + "learning_rate": 1.6227239164658187e-05, + "loss": 1.0769, + "step": 197800 + }, + { + "epoch": 2.76, + "learning_rate": 1.622027333899887e-05, + "loss": 1.0753, + "step": 197900 + }, + { + "epoch": 2.76, + "learning_rate": 1.6213307513339556e-05, + "loss": 1.0535, + "step": 198000 + }, + { + "epoch": 2.76, + "learning_rate": 1.6206341687680242e-05, + "loss": 1.0612, + "step": 198100 + }, + { + "epoch": 2.76, + "learning_rate": 1.6199375862020925e-05, + "loss": 1.0586, + "step": 198200 + }, + { + "epoch": 2.76, + "learning_rate": 1.619241003636161e-05, + "loss": 1.0834, + "step": 198300 + }, + { + "epoch": 2.76, + "learning_rate": 1.6185444210702295e-05, + "loss": 1.1046, + "step": 198400 + }, + { + "epoch": 2.77, + "learning_rate": 1.617847838504298e-05, + "loss": 1.0751, + "step": 198500 + }, + { + "epoch": 2.77, + "learning_rate": 1.6171512559383664e-05, + "loss": 1.0657, + "step": 198600 + }, + { + "epoch": 2.77, + "learning_rate": 1.616454673372435e-05, + "loss": 1.0797, + "step": 198700 + }, + { + "epoch": 2.77, + "learning_rate": 1.6157580908065033e-05, + "loss": 1.0562, + "step": 198800 + }, + { + "epoch": 2.77, + "learning_rate": 1.615061508240572e-05, + "loss": 1.0602, + "step": 198900 + }, + { + "epoch": 2.77, + "learning_rate": 1.6143649256746403e-05, + "loss": 1.0542, + "step": 199000 + }, + { + "epoch": 2.77, + "learning_rate": 1.6136753089343683e-05, + "loss": 1.0838, + "step": 199100 + }, + { + "epoch": 2.78, + "learning_rate": 1.6129787263684366e-05, + "loss": 1.0975, + "step": 199200 + }, + { + "epoch": 2.78, + "learning_rate": 1.6122821438025052e-05, + "loss": 1.0557, + "step": 199300 + }, + { + "epoch": 2.78, + "learning_rate": 1.6115855612365735e-05, + "loss": 1.0981, + "step": 199400 + }, + { + "epoch": 2.78, + "learning_rate": 1.610888978670642e-05, + "loss": 1.1035, + "step": 199500 + }, + { + "epoch": 2.78, + "learning_rate": 1.6101923961047104e-05, + "loss": 1.0754, + "step": 199600 + }, + { + "epoch": 2.78, + "learning_rate": 1.609495813538779e-05, + "loss": 1.0956, + "step": 199700 + }, + { + "epoch": 2.78, + "learning_rate": 1.608799230972847e-05, + "loss": 1.0734, + "step": 199800 + }, + { + "epoch": 2.78, + "learning_rate": 1.6081026484069157e-05, + "loss": 1.0515, + "step": 199900 + }, + { + "epoch": 2.79, + "learning_rate": 1.607406065840984e-05, + "loss": 1.0866, + "step": 200000 + }, + { + "epoch": 2.79, + "learning_rate": 1.6067094832750526e-05, + "loss": 1.0826, + "step": 200100 + }, + { + "epoch": 2.79, + "learning_rate": 1.606012900709121e-05, + "loss": 1.0871, + "step": 200200 + }, + { + "epoch": 2.79, + "learning_rate": 1.6053163181431895e-05, + "loss": 1.0809, + "step": 200300 + }, + { + "epoch": 2.79, + "learning_rate": 1.604619735577258e-05, + "loss": 1.1262, + "step": 200400 + }, + { + "epoch": 2.79, + "learning_rate": 1.6039231530113265e-05, + "loss": 1.1024, + "step": 200500 + }, + { + "epoch": 2.79, + "learning_rate": 1.6032265704453948e-05, + "loss": 1.0786, + "step": 200600 + }, + { + "epoch": 2.8, + "learning_rate": 1.6025299878794634e-05, + "loss": 1.0486, + "step": 200700 + }, + { + "epoch": 2.8, + "learning_rate": 1.6018334053135317e-05, + "loss": 1.0435, + "step": 200800 + }, + { + "epoch": 2.8, + "learning_rate": 1.6011368227476003e-05, + "loss": 1.1039, + "step": 200900 + }, + { + "epoch": 2.8, + "learning_rate": 1.6004402401816686e-05, + "loss": 1.0653, + "step": 201000 + }, + { + "epoch": 2.8, + "learning_rate": 1.5997436576157373e-05, + "loss": 1.0518, + "step": 201100 + }, + { + "epoch": 2.8, + "learning_rate": 1.5990470750498056e-05, + "loss": 1.104, + "step": 201200 + }, + { + "epoch": 2.8, + "learning_rate": 1.5983504924838742e-05, + "loss": 1.0759, + "step": 201300 + }, + { + "epoch": 2.81, + "learning_rate": 1.597653909917943e-05, + "loss": 1.0754, + "step": 201400 + }, + { + "epoch": 2.81, + "learning_rate": 1.5969642931776705e-05, + "loss": 1.0443, + "step": 201500 + }, + { + "epoch": 2.81, + "learning_rate": 1.5962677106117388e-05, + "loss": 1.0767, + "step": 201600 + }, + { + "epoch": 2.81, + "learning_rate": 1.5955711280458074e-05, + "loss": 1.0629, + "step": 201700 + }, + { + "epoch": 2.81, + "learning_rate": 1.5948745454798757e-05, + "loss": 1.0433, + "step": 201800 + }, + { + "epoch": 2.81, + "learning_rate": 1.5941779629139444e-05, + "loss": 1.1352, + "step": 201900 + }, + { + "epoch": 2.81, + "learning_rate": 1.5934813803480127e-05, + "loss": 1.0735, + "step": 202000 + }, + { + "epoch": 2.82, + "learning_rate": 1.5927847977820813e-05, + "loss": 1.0932, + "step": 202100 + }, + { + "epoch": 2.82, + "learning_rate": 1.5920882152161496e-05, + "loss": 1.0893, + "step": 202200 + }, + { + "epoch": 2.82, + "learning_rate": 1.591391632650218e-05, + "loss": 1.0984, + "step": 202300 + }, + { + "epoch": 2.82, + "learning_rate": 1.5906950500842865e-05, + "loss": 1.0618, + "step": 202400 + }, + { + "epoch": 2.82, + "learning_rate": 1.589998467518355e-05, + "loss": 1.0906, + "step": 202500 + }, + { + "epoch": 2.82, + "learning_rate": 1.5893018849524235e-05, + "loss": 1.078, + "step": 202600 + }, + { + "epoch": 2.82, + "learning_rate": 1.5886053023864918e-05, + "loss": 1.1111, + "step": 202700 + }, + { + "epoch": 2.83, + "learning_rate": 1.5879087198205604e-05, + "loss": 1.0833, + "step": 202800 + }, + { + "epoch": 2.83, + "learning_rate": 1.5872121372546287e-05, + "loss": 1.0899, + "step": 202900 + }, + { + "epoch": 2.83, + "learning_rate": 1.5865155546886974e-05, + "loss": 1.0476, + "step": 203000 + }, + { + "epoch": 2.83, + "learning_rate": 1.5858189721227657e-05, + "loss": 1.0827, + "step": 203100 + }, + { + "epoch": 2.83, + "learning_rate": 1.5851223895568343e-05, + "loss": 1.0933, + "step": 203200 + }, + { + "epoch": 2.83, + "learning_rate": 1.5844258069909026e-05, + "loss": 1.136, + "step": 203300 + }, + { + "epoch": 2.83, + "learning_rate": 1.5837292244249712e-05, + "loss": 1.1106, + "step": 203400 + }, + { + "epoch": 2.84, + "learning_rate": 1.5830326418590395e-05, + "loss": 1.0957, + "step": 203500 + }, + { + "epoch": 2.84, + "learning_rate": 1.582336059293108e-05, + "loss": 1.1121, + "step": 203600 + }, + { + "epoch": 2.84, + "learning_rate": 1.5816394767271765e-05, + "loss": 1.0751, + "step": 203700 + }, + { + "epoch": 2.84, + "learning_rate": 1.580942894161245e-05, + "loss": 1.0797, + "step": 203800 + }, + { + "epoch": 2.84, + "learning_rate": 1.5802463115953134e-05, + "loss": 1.0954, + "step": 203900 + }, + { + "epoch": 2.84, + "learning_rate": 1.579549729029382e-05, + "loss": 1.0925, + "step": 204000 + }, + { + "epoch": 2.84, + "learning_rate": 1.5788531464634503e-05, + "loss": 1.0731, + "step": 204100 + }, + { + "epoch": 2.84, + "learning_rate": 1.578156563897519e-05, + "loss": 1.0628, + "step": 204200 + }, + { + "epoch": 2.85, + "learning_rate": 1.5774599813315873e-05, + "loss": 1.095, + "step": 204300 + }, + { + "epoch": 2.85, + "learning_rate": 1.576763398765656e-05, + "loss": 1.074, + "step": 204400 + }, + { + "epoch": 2.85, + "learning_rate": 1.576066816199724e-05, + "loss": 1.0821, + "step": 204500 + }, + { + "epoch": 2.85, + "learning_rate": 1.5753702336337925e-05, + "loss": 1.122, + "step": 204600 + }, + { + "epoch": 2.85, + "learning_rate": 1.574673651067861e-05, + "loss": 1.0234, + "step": 204700 + }, + { + "epoch": 2.85, + "learning_rate": 1.5739770685019294e-05, + "loss": 1.0927, + "step": 204800 + }, + { + "epoch": 2.85, + "learning_rate": 1.573280485935998e-05, + "loss": 1.0797, + "step": 204900 + }, + { + "epoch": 2.86, + "learning_rate": 1.5725839033700664e-05, + "loss": 1.0855, + "step": 205000 + }, + { + "epoch": 2.86, + "learning_rate": 1.571887320804135e-05, + "loss": 1.0915, + "step": 205100 + }, + { + "epoch": 2.86, + "learning_rate": 1.5711907382382033e-05, + "loss": 1.0885, + "step": 205200 + }, + { + "epoch": 2.86, + "learning_rate": 1.570494155672272e-05, + "loss": 1.0944, + "step": 205300 + }, + { + "epoch": 2.86, + "learning_rate": 1.5697975731063403e-05, + "loss": 1.1138, + "step": 205400 + }, + { + "epoch": 2.86, + "learning_rate": 1.569100990540409e-05, + "loss": 1.0337, + "step": 205500 + }, + { + "epoch": 2.86, + "learning_rate": 1.5684044079744772e-05, + "loss": 1.0726, + "step": 205600 + }, + { + "epoch": 2.87, + "learning_rate": 1.567707825408546e-05, + "loss": 1.0861, + "step": 205700 + }, + { + "epoch": 2.87, + "learning_rate": 1.567011242842614e-05, + "loss": 1.0717, + "step": 205800 + }, + { + "epoch": 2.87, + "learning_rate": 1.5663146602766828e-05, + "loss": 1.1152, + "step": 205900 + }, + { + "epoch": 2.87, + "learning_rate": 1.565618077710751e-05, + "loss": 1.0978, + "step": 206000 + }, + { + "epoch": 2.87, + "learning_rate": 1.5649214951448197e-05, + "loss": 1.0851, + "step": 206100 + }, + { + "epoch": 2.87, + "learning_rate": 1.564224912578888e-05, + "loss": 1.0665, + "step": 206200 + }, + { + "epoch": 2.87, + "learning_rate": 1.5635283300129566e-05, + "loss": 1.0686, + "step": 206300 + }, + { + "epoch": 2.88, + "learning_rate": 1.562831747447025e-05, + "loss": 1.0869, + "step": 206400 + }, + { + "epoch": 2.88, + "learning_rate": 1.5621351648810936e-05, + "loss": 1.1102, + "step": 206500 + }, + { + "epoch": 2.88, + "learning_rate": 1.561438582315162e-05, + "loss": 1.0685, + "step": 206600 + }, + { + "epoch": 2.88, + "learning_rate": 1.5607419997492302e-05, + "loss": 1.0487, + "step": 206700 + }, + { + "epoch": 2.88, + "learning_rate": 1.5600454171832985e-05, + "loss": 1.0682, + "step": 206800 + }, + { + "epoch": 2.88, + "learning_rate": 1.559348834617367e-05, + "loss": 1.0777, + "step": 206900 + }, + { + "epoch": 2.88, + "learning_rate": 1.5586522520514354e-05, + "loss": 1.0471, + "step": 207000 + }, + { + "epoch": 2.89, + "learning_rate": 1.557955669485504e-05, + "loss": 1.0578, + "step": 207100 + }, + { + "epoch": 2.89, + "learning_rate": 1.5572590869195727e-05, + "loss": 1.0603, + "step": 207200 + }, + { + "epoch": 2.89, + "learning_rate": 1.556562504353641e-05, + "loss": 1.0783, + "step": 207300 + }, + { + "epoch": 2.89, + "learning_rate": 1.5558659217877096e-05, + "loss": 1.1009, + "step": 207400 + }, + { + "epoch": 2.89, + "learning_rate": 1.555169339221778e-05, + "loss": 1.0468, + "step": 207500 + }, + { + "epoch": 2.89, + "learning_rate": 1.5544797224815056e-05, + "loss": 1.0694, + "step": 207600 + }, + { + "epoch": 2.89, + "learning_rate": 1.5537831399155742e-05, + "loss": 1.0634, + "step": 207700 + }, + { + "epoch": 2.89, + "learning_rate": 1.5530865573496425e-05, + "loss": 1.0974, + "step": 207800 + }, + { + "epoch": 2.9, + "learning_rate": 1.552389974783711e-05, + "loss": 1.1073, + "step": 207900 + }, + { + "epoch": 2.9, + "learning_rate": 1.5516933922177798e-05, + "loss": 1.1259, + "step": 208000 + }, + { + "epoch": 2.9, + "learning_rate": 1.550996809651848e-05, + "loss": 1.0786, + "step": 208100 + }, + { + "epoch": 2.9, + "learning_rate": 1.5503002270859167e-05, + "loss": 1.0566, + "step": 208200 + }, + { + "epoch": 2.9, + "learning_rate": 1.549603644519985e-05, + "loss": 1.1088, + "step": 208300 + }, + { + "epoch": 2.9, + "learning_rate": 1.5489070619540537e-05, + "loss": 1.0866, + "step": 208400 + }, + { + "epoch": 2.9, + "learning_rate": 1.548210479388122e-05, + "loss": 1.1046, + "step": 208500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5475138968221906e-05, + "loss": 1.0708, + "step": 208600 + }, + { + "epoch": 2.91, + "learning_rate": 1.546817314256259e-05, + "loss": 1.0877, + "step": 208700 + }, + { + "epoch": 2.91, + "learning_rate": 1.546127697515987e-05, + "loss": 1.0964, + "step": 208800 + }, + { + "epoch": 2.91, + "learning_rate": 1.545431114950055e-05, + "loss": 1.0895, + "step": 208900 + }, + { + "epoch": 2.91, + "learning_rate": 1.5447345323841238e-05, + "loss": 1.0904, + "step": 209000 + }, + { + "epoch": 2.91, + "learning_rate": 1.544037949818192e-05, + "loss": 1.1009, + "step": 209100 + }, + { + "epoch": 2.91, + "learning_rate": 1.5433413672522607e-05, + "loss": 1.0585, + "step": 209200 + }, + { + "epoch": 2.92, + "learning_rate": 1.5426447846863287e-05, + "loss": 1.0974, + "step": 209300 + }, + { + "epoch": 2.92, + "learning_rate": 1.5419482021203973e-05, + "loss": 1.1168, + "step": 209400 + }, + { + "epoch": 2.92, + "learning_rate": 1.5412516195544656e-05, + "loss": 1.0974, + "step": 209500 + }, + { + "epoch": 2.92, + "learning_rate": 1.5405550369885343e-05, + "loss": 1.0855, + "step": 209600 + }, + { + "epoch": 2.92, + "learning_rate": 1.5398584544226026e-05, + "loss": 1.0696, + "step": 209700 + }, + { + "epoch": 2.92, + "learning_rate": 1.5391618718566712e-05, + "loss": 1.0956, + "step": 209800 + }, + { + "epoch": 2.92, + "learning_rate": 1.5384652892907395e-05, + "loss": 1.0661, + "step": 209900 + }, + { + "epoch": 2.93, + "learning_rate": 1.537768706724808e-05, + "loss": 1.0785, + "step": 210000 + }, + { + "epoch": 2.93, + "learning_rate": 1.5370721241588764e-05, + "loss": 1.0751, + "step": 210100 + }, + { + "epoch": 2.93, + "learning_rate": 1.536375541592945e-05, + "loss": 1.1149, + "step": 210200 + }, + { + "epoch": 2.93, + "learning_rate": 1.5356789590270134e-05, + "loss": 1.1071, + "step": 210300 + }, + { + "epoch": 2.93, + "learning_rate": 1.534982376461082e-05, + "loss": 1.0892, + "step": 210400 + }, + { + "epoch": 2.93, + "learning_rate": 1.5342857938951503e-05, + "loss": 1.0664, + "step": 210500 + }, + { + "epoch": 2.93, + "learning_rate": 1.533589211329219e-05, + "loss": 1.0815, + "step": 210600 + }, + { + "epoch": 2.94, + "learning_rate": 1.5328926287632873e-05, + "loss": 1.07, + "step": 210700 + }, + { + "epoch": 2.94, + "learning_rate": 1.532196046197356e-05, + "loss": 1.0735, + "step": 210800 + }, + { + "epoch": 2.94, + "learning_rate": 1.5314994636314242e-05, + "loss": 1.0877, + "step": 210900 + }, + { + "epoch": 2.94, + "learning_rate": 1.530802881065493e-05, + "loss": 1.0944, + "step": 211000 + }, + { + "epoch": 2.94, + "learning_rate": 1.530106298499561e-05, + "loss": 1.0892, + "step": 211100 + }, + { + "epoch": 2.94, + "learning_rate": 1.5294097159336298e-05, + "loss": 1.0595, + "step": 211200 + }, + { + "epoch": 2.94, + "learning_rate": 1.5287131333676984e-05, + "loss": 1.0714, + "step": 211300 + }, + { + "epoch": 2.95, + "learning_rate": 1.5280165508017667e-05, + "loss": 1.1189, + "step": 211400 + }, + { + "epoch": 2.95, + "learning_rate": 1.5273199682358353e-05, + "loss": 1.0914, + "step": 211500 + }, + { + "epoch": 2.95, + "learning_rate": 1.5266233856699033e-05, + "loss": 1.0677, + "step": 211600 + }, + { + "epoch": 2.95, + "learning_rate": 1.525926803103972e-05, + "loss": 1.0775, + "step": 211700 + }, + { + "epoch": 2.95, + "learning_rate": 1.5252302205380402e-05, + "loss": 1.065, + "step": 211800 + }, + { + "epoch": 2.95, + "learning_rate": 1.524540603797768e-05, + "loss": 1.0778, + "step": 211900 + }, + { + "epoch": 2.95, + "learning_rate": 1.5238440212318365e-05, + "loss": 1.0582, + "step": 212000 + }, + { + "epoch": 2.95, + "learning_rate": 1.523147438665905e-05, + "loss": 1.0656, + "step": 212100 + }, + { + "epoch": 2.96, + "learning_rate": 1.5224508560999735e-05, + "loss": 1.0801, + "step": 212200 + }, + { + "epoch": 2.96, + "learning_rate": 1.521754273534042e-05, + "loss": 1.0756, + "step": 212300 + }, + { + "epoch": 2.96, + "learning_rate": 1.5210576909681104e-05, + "loss": 1.0706, + "step": 212400 + }, + { + "epoch": 2.96, + "learning_rate": 1.5203611084021789e-05, + "loss": 1.0565, + "step": 212500 + }, + { + "epoch": 2.96, + "learning_rate": 1.5196645258362473e-05, + "loss": 1.1021, + "step": 212600 + }, + { + "epoch": 2.96, + "learning_rate": 1.5189749090959751e-05, + "loss": 1.0911, + "step": 212700 + }, + { + "epoch": 2.96, + "learning_rate": 1.5182783265300436e-05, + "loss": 1.0772, + "step": 212800 + }, + { + "epoch": 2.97, + "learning_rate": 1.517581743964112e-05, + "loss": 1.0479, + "step": 212900 + }, + { + "epoch": 2.97, + "learning_rate": 1.5168851613981805e-05, + "loss": 1.0724, + "step": 213000 + }, + { + "epoch": 2.97, + "learning_rate": 1.516188578832249e-05, + "loss": 1.0897, + "step": 213100 + }, + { + "epoch": 2.97, + "learning_rate": 1.5154919962663175e-05, + "loss": 1.0473, + "step": 213200 + }, + { + "epoch": 2.97, + "learning_rate": 1.514795413700386e-05, + "loss": 1.0869, + "step": 213300 + }, + { + "epoch": 2.97, + "learning_rate": 1.5140988311344544e-05, + "loss": 1.0665, + "step": 213400 + }, + { + "epoch": 2.97, + "learning_rate": 1.5134022485685229e-05, + "loss": 1.076, + "step": 213500 + }, + { + "epoch": 2.98, + "learning_rate": 1.5127056660025914e-05, + "loss": 1.107, + "step": 213600 + }, + { + "epoch": 2.98, + "learning_rate": 1.5120090834366598e-05, + "loss": 1.0896, + "step": 213700 + }, + { + "epoch": 2.98, + "learning_rate": 1.5113125008707283e-05, + "loss": 1.0652, + "step": 213800 + }, + { + "epoch": 2.98, + "learning_rate": 1.5106159183047968e-05, + "loss": 1.0795, + "step": 213900 + }, + { + "epoch": 2.98, + "learning_rate": 1.5099193357388652e-05, + "loss": 1.0711, + "step": 214000 + }, + { + "epoch": 2.98, + "learning_rate": 1.5092227531729339e-05, + "loss": 1.0763, + "step": 214100 + }, + { + "epoch": 2.98, + "learning_rate": 1.508526170607002e-05, + "loss": 1.0809, + "step": 214200 + }, + { + "epoch": 2.99, + "learning_rate": 1.5078295880410705e-05, + "loss": 1.0848, + "step": 214300 + }, + { + "epoch": 2.99, + "learning_rate": 1.507133005475139e-05, + "loss": 1.1072, + "step": 214400 + }, + { + "epoch": 2.99, + "learning_rate": 1.5064364229092074e-05, + "loss": 1.0776, + "step": 214500 + }, + { + "epoch": 2.99, + "learning_rate": 1.5057398403432759e-05, + "loss": 1.0591, + "step": 214600 + }, + { + "epoch": 2.99, + "learning_rate": 1.5050432577773443e-05, + "loss": 1.0677, + "step": 214700 + }, + { + "epoch": 2.99, + "learning_rate": 1.5043466752114128e-05, + "loss": 1.0943, + "step": 214800 + }, + { + "epoch": 2.99, + "learning_rate": 1.5036500926454813e-05, + "loss": 1.1069, + "step": 214900 + }, + { + "epoch": 3.0, + "learning_rate": 1.5029535100795497e-05, + "loss": 1.0775, + "step": 215000 + }, + { + "epoch": 3.0, + "learning_rate": 1.5022569275136182e-05, + "loss": 1.0876, + "step": 215100 + }, + { + "epoch": 3.0, + "learning_rate": 1.5015603449476867e-05, + "loss": 1.108, + "step": 215200 + }, + { + "epoch": 3.0, + "learning_rate": 1.5008637623817552e-05, + "loss": 1.0907, + "step": 215300 + }, + { + "epoch": 3.0, + "eval_gen_len": 20.0, + "eval_loss": 1.145164132118225, + "eval_rouge1": 12.6221, + "eval_rouge2": 3.773, + "eval_rougeL": 12.1226, + "eval_rougeLsum": 12.2359, + "eval_runtime": 1533.982, + "eval_samples_per_second": 8.715, + "eval_steps_per_second": 2.179, + "step": 215337 + }, + { + "epoch": 3.0, + "learning_rate": 1.5001671798158236e-05, + "loss": 0.9923, + "step": 215400 + }, + { + "epoch": 3.0, + "learning_rate": 1.4994705972498921e-05, + "loss": 0.9906, + "step": 215500 + }, + { + "epoch": 3.0, + "learning_rate": 1.4987740146839606e-05, + "loss": 0.9636, + "step": 215600 + }, + { + "epoch": 3.01, + "learning_rate": 1.498077432118029e-05, + "loss": 0.9861, + "step": 215700 + }, + { + "epoch": 3.01, + "learning_rate": 1.4973808495520975e-05, + "loss": 0.9771, + "step": 215800 + }, + { + "epoch": 3.01, + "learning_rate": 1.4966842669861658e-05, + "loss": 0.9615, + "step": 215900 + }, + { + "epoch": 3.01, + "learning_rate": 1.4959876844202343e-05, + "loss": 0.9824, + "step": 216000 + }, + { + "epoch": 3.01, + "learning_rate": 1.4952911018543027e-05, + "loss": 0.9312, + "step": 216100 + }, + { + "epoch": 3.01, + "learning_rate": 1.4945945192883712e-05, + "loss": 0.9745, + "step": 216200 + }, + { + "epoch": 3.01, + "learning_rate": 1.4938979367224397e-05, + "loss": 0.9686, + "step": 216300 + }, + { + "epoch": 3.01, + "learning_rate": 1.4932013541565081e-05, + "loss": 0.9777, + "step": 216400 + }, + { + "epoch": 3.02, + "learning_rate": 1.4925047715905768e-05, + "loss": 0.9322, + "step": 216500 + }, + { + "epoch": 3.02, + "learning_rate": 1.4918081890246452e-05, + "loss": 0.9771, + "step": 216600 + }, + { + "epoch": 3.02, + "learning_rate": 1.4911116064587137e-05, + "loss": 0.9627, + "step": 216700 + }, + { + "epoch": 3.02, + "learning_rate": 1.4904150238927822e-05, + "loss": 0.9897, + "step": 216800 + }, + { + "epoch": 3.02, + "learning_rate": 1.4897184413268506e-05, + "loss": 0.9642, + "step": 216900 + }, + { + "epoch": 3.02, + "learning_rate": 1.489021858760919e-05, + "loss": 0.9726, + "step": 217000 + }, + { + "epoch": 3.02, + "learning_rate": 1.4883252761949874e-05, + "loss": 0.9797, + "step": 217100 + }, + { + "epoch": 3.03, + "learning_rate": 1.4876286936290559e-05, + "loss": 0.991, + "step": 217200 + }, + { + "epoch": 3.03, + "learning_rate": 1.4869321110631243e-05, + "loss": 0.9746, + "step": 217300 + }, + { + "epoch": 3.03, + "learning_rate": 1.4862355284971928e-05, + "loss": 0.9629, + "step": 217400 + }, + { + "epoch": 3.03, + "learning_rate": 1.4855389459312613e-05, + "loss": 0.967, + "step": 217500 + }, + { + "epoch": 3.03, + "learning_rate": 1.4848423633653298e-05, + "loss": 0.985, + "step": 217600 + }, + { + "epoch": 3.03, + "learning_rate": 1.4841457807993982e-05, + "loss": 0.9809, + "step": 217700 + }, + { + "epoch": 3.03, + "learning_rate": 1.4834491982334667e-05, + "loss": 0.9763, + "step": 217800 + }, + { + "epoch": 3.04, + "learning_rate": 1.4827526156675352e-05, + "loss": 0.9669, + "step": 217900 + }, + { + "epoch": 3.04, + "learning_rate": 1.4820560331016036e-05, + "loss": 0.9642, + "step": 218000 + }, + { + "epoch": 3.04, + "learning_rate": 1.481359450535672e-05, + "loss": 0.9634, + "step": 218100 + }, + { + "epoch": 3.04, + "learning_rate": 1.4806628679697404e-05, + "loss": 0.9793, + "step": 218200 + }, + { + "epoch": 3.04, + "learning_rate": 1.4799662854038089e-05, + "loss": 0.9267, + "step": 218300 + }, + { + "epoch": 3.04, + "learning_rate": 1.4792697028378773e-05, + "loss": 0.9831, + "step": 218400 + }, + { + "epoch": 3.04, + "learning_rate": 1.4785731202719458e-05, + "loss": 0.9944, + "step": 218500 + }, + { + "epoch": 3.05, + "learning_rate": 1.4778765377060143e-05, + "loss": 0.9952, + "step": 218600 + }, + { + "epoch": 3.05, + "learning_rate": 1.4771799551400827e-05, + "loss": 0.9637, + "step": 218700 + }, + { + "epoch": 3.05, + "learning_rate": 1.4764903383998105e-05, + "loss": 0.9783, + "step": 218800 + }, + { + "epoch": 3.05, + "learning_rate": 1.475793755833879e-05, + "loss": 0.9377, + "step": 218900 + }, + { + "epoch": 3.05, + "learning_rate": 1.4750971732679475e-05, + "loss": 0.9509, + "step": 219000 + }, + { + "epoch": 3.05, + "learning_rate": 1.474400590702016e-05, + "loss": 0.99, + "step": 219100 + }, + { + "epoch": 3.05, + "learning_rate": 1.4737040081360844e-05, + "loss": 0.9931, + "step": 219200 + }, + { + "epoch": 3.06, + "learning_rate": 1.4730074255701529e-05, + "loss": 1.02, + "step": 219300 + }, + { + "epoch": 3.06, + "learning_rate": 1.4723108430042212e-05, + "loss": 0.9756, + "step": 219400 + }, + { + "epoch": 3.06, + "learning_rate": 1.4716142604382897e-05, + "loss": 1.015, + "step": 219500 + }, + { + "epoch": 3.06, + "learning_rate": 1.4709176778723581e-05, + "loss": 0.9802, + "step": 219600 + }, + { + "epoch": 3.06, + "learning_rate": 1.4702210953064266e-05, + "loss": 1.0137, + "step": 219700 + }, + { + "epoch": 3.06, + "learning_rate": 1.4695314785661544e-05, + "loss": 0.9492, + "step": 219800 + }, + { + "epoch": 3.06, + "learning_rate": 1.4688348960002229e-05, + "loss": 0.9642, + "step": 219900 + }, + { + "epoch": 3.06, + "learning_rate": 1.4681383134342913e-05, + "loss": 0.9745, + "step": 220000 + }, + { + "epoch": 3.07, + "learning_rate": 1.4674417308683598e-05, + "loss": 0.9685, + "step": 220100 + }, + { + "epoch": 3.07, + "learning_rate": 1.4667451483024283e-05, + "loss": 0.9723, + "step": 220200 + }, + { + "epoch": 3.07, + "learning_rate": 1.4660485657364967e-05, + "loss": 1.0045, + "step": 220300 + }, + { + "epoch": 3.07, + "learning_rate": 1.4653519831705652e-05, + "loss": 0.9695, + "step": 220400 + }, + { + "epoch": 3.07, + "learning_rate": 1.4646554006046337e-05, + "loss": 0.9483, + "step": 220500 + }, + { + "epoch": 3.07, + "learning_rate": 1.4639588180387023e-05, + "loss": 0.9441, + "step": 220600 + }, + { + "epoch": 3.07, + "learning_rate": 1.4632622354727706e-05, + "loss": 0.9568, + "step": 220700 + }, + { + "epoch": 3.08, + "learning_rate": 1.4625656529068391e-05, + "loss": 0.9447, + "step": 220800 + }, + { + "epoch": 3.08, + "learning_rate": 1.4618690703409076e-05, + "loss": 0.9856, + "step": 220900 + }, + { + "epoch": 3.08, + "learning_rate": 1.461172487774976e-05, + "loss": 0.9654, + "step": 221000 + }, + { + "epoch": 3.08, + "learning_rate": 1.4604759052090445e-05, + "loss": 0.9757, + "step": 221100 + }, + { + "epoch": 3.08, + "learning_rate": 1.459779322643113e-05, + "loss": 0.9999, + "step": 221200 + }, + { + "epoch": 3.08, + "learning_rate": 1.4590827400771814e-05, + "loss": 0.9977, + "step": 221300 + }, + { + "epoch": 3.08, + "learning_rate": 1.4583861575112499e-05, + "loss": 0.9694, + "step": 221400 + }, + { + "epoch": 3.09, + "learning_rate": 1.4576895749453184e-05, + "loss": 0.9623, + "step": 221500 + }, + { + "epoch": 3.09, + "learning_rate": 1.4569929923793868e-05, + "loss": 0.9558, + "step": 221600 + }, + { + "epoch": 3.09, + "learning_rate": 1.4562964098134553e-05, + "loss": 0.9392, + "step": 221700 + }, + { + "epoch": 3.09, + "learning_rate": 1.4555998272475236e-05, + "loss": 0.9699, + "step": 221800 + }, + { + "epoch": 3.09, + "learning_rate": 1.454903244681592e-05, + "loss": 0.9772, + "step": 221900 + }, + { + "epoch": 3.09, + "learning_rate": 1.4542066621156605e-05, + "loss": 0.9531, + "step": 222000 + }, + { + "epoch": 3.09, + "learning_rate": 1.453510079549729e-05, + "loss": 0.9337, + "step": 222100 + }, + { + "epoch": 3.1, + "learning_rate": 1.4528134969837975e-05, + "loss": 1.0065, + "step": 222200 + }, + { + "epoch": 3.1, + "learning_rate": 1.452116914417866e-05, + "loss": 0.9943, + "step": 222300 + }, + { + "epoch": 3.1, + "learning_rate": 1.4514203318519344e-05, + "loss": 0.9692, + "step": 222400 + }, + { + "epoch": 3.1, + "learning_rate": 1.4507237492860029e-05, + "loss": 0.9519, + "step": 222500 + }, + { + "epoch": 3.1, + "learning_rate": 1.4500271667200713e-05, + "loss": 0.9975, + "step": 222600 + }, + { + "epoch": 3.1, + "learning_rate": 1.4493305841541398e-05, + "loss": 0.9832, + "step": 222700 + }, + { + "epoch": 3.1, + "learning_rate": 1.4486340015882083e-05, + "loss": 0.956, + "step": 222800 + }, + { + "epoch": 3.11, + "learning_rate": 1.4479374190222766e-05, + "loss": 0.9643, + "step": 222900 + }, + { + "epoch": 3.11, + "learning_rate": 1.4472408364563452e-05, + "loss": 0.9638, + "step": 223000 + }, + { + "epoch": 3.11, + "learning_rate": 1.4465442538904137e-05, + "loss": 0.976, + "step": 223100 + }, + { + "epoch": 3.11, + "learning_rate": 1.4458476713244822e-05, + "loss": 1.0135, + "step": 223200 + }, + { + "epoch": 3.11, + "learning_rate": 1.4451510887585506e-05, + "loss": 0.9846, + "step": 223300 + }, + { + "epoch": 3.11, + "learning_rate": 1.4444545061926191e-05, + "loss": 1.0115, + "step": 223400 + }, + { + "epoch": 3.11, + "learning_rate": 1.4437579236266876e-05, + "loss": 0.9863, + "step": 223500 + }, + { + "epoch": 3.12, + "learning_rate": 1.443061341060756e-05, + "loss": 0.9686, + "step": 223600 + }, + { + "epoch": 3.12, + "learning_rate": 1.4423647584948245e-05, + "loss": 0.9709, + "step": 223700 + }, + { + "epoch": 3.12, + "learning_rate": 1.441668175928893e-05, + "loss": 0.9712, + "step": 223800 + }, + { + "epoch": 3.12, + "learning_rate": 1.4409785591886208e-05, + "loss": 0.9408, + "step": 223900 + }, + { + "epoch": 3.12, + "learning_rate": 1.4402819766226892e-05, + "loss": 0.9603, + "step": 224000 + }, + { + "epoch": 3.12, + "learning_rate": 1.4395853940567577e-05, + "loss": 0.9748, + "step": 224100 + }, + { + "epoch": 3.12, + "learning_rate": 1.438888811490826e-05, + "loss": 0.9562, + "step": 224200 + }, + { + "epoch": 3.12, + "learning_rate": 1.4381922289248945e-05, + "loss": 1.0136, + "step": 224300 + }, + { + "epoch": 3.13, + "learning_rate": 1.437495646358963e-05, + "loss": 0.9822, + "step": 224400 + }, + { + "epoch": 3.13, + "learning_rate": 1.4367990637930314e-05, + "loss": 1.0055, + "step": 224500 + }, + { + "epoch": 3.13, + "learning_rate": 1.4361024812270999e-05, + "loss": 0.9875, + "step": 224600 + }, + { + "epoch": 3.13, + "learning_rate": 1.4354058986611684e-05, + "loss": 1.0064, + "step": 224700 + }, + { + "epoch": 3.13, + "learning_rate": 1.4347162819208962e-05, + "loss": 0.9762, + "step": 224800 + }, + { + "epoch": 3.13, + "learning_rate": 1.4340196993549646e-05, + "loss": 0.9445, + "step": 224900 + }, + { + "epoch": 3.13, + "learning_rate": 1.4333231167890331e-05, + "loss": 0.9514, + "step": 225000 + }, + { + "epoch": 3.14, + "learning_rate": 1.4326265342231016e-05, + "loss": 0.9584, + "step": 225100 + }, + { + "epoch": 3.14, + "learning_rate": 1.43192995165717e-05, + "loss": 1.0148, + "step": 225200 + }, + { + "epoch": 3.14, + "learning_rate": 1.4312333690912385e-05, + "loss": 0.9802, + "step": 225300 + }, + { + "epoch": 3.14, + "learning_rate": 1.430536786525307e-05, + "loss": 0.98, + "step": 225400 + }, + { + "epoch": 3.14, + "learning_rate": 1.4298402039593753e-05, + "loss": 0.9218, + "step": 225500 + }, + { + "epoch": 3.14, + "learning_rate": 1.4291436213934437e-05, + "loss": 0.9785, + "step": 225600 + }, + { + "epoch": 3.14, + "learning_rate": 1.4284470388275122e-05, + "loss": 0.9941, + "step": 225700 + }, + { + "epoch": 3.15, + "learning_rate": 1.4277504562615807e-05, + "loss": 0.9878, + "step": 225800 + }, + { + "epoch": 3.15, + "learning_rate": 1.4270538736956492e-05, + "loss": 0.9692, + "step": 225900 + }, + { + "epoch": 3.15, + "learning_rate": 1.4263572911297176e-05, + "loss": 0.9958, + "step": 226000 + }, + { + "epoch": 3.15, + "learning_rate": 1.4256607085637861e-05, + "loss": 0.9872, + "step": 226100 + }, + { + "epoch": 3.15, + "learning_rate": 1.4249641259978546e-05, + "loss": 0.9813, + "step": 226200 + }, + { + "epoch": 3.15, + "learning_rate": 1.424267543431923e-05, + "loss": 0.9601, + "step": 226300 + }, + { + "epoch": 3.15, + "learning_rate": 1.4235709608659915e-05, + "loss": 0.9642, + "step": 226400 + }, + { + "epoch": 3.16, + "learning_rate": 1.42287437830006e-05, + "loss": 0.9719, + "step": 226500 + }, + { + "epoch": 3.16, + "learning_rate": 1.4221777957341284e-05, + "loss": 0.9609, + "step": 226600 + }, + { + "epoch": 3.16, + "learning_rate": 1.4214812131681967e-05, + "loss": 0.9422, + "step": 226700 + }, + { + "epoch": 3.16, + "learning_rate": 1.4207846306022652e-05, + "loss": 0.9826, + "step": 226800 + }, + { + "epoch": 3.16, + "learning_rate": 1.4200880480363337e-05, + "loss": 0.9508, + "step": 226900 + }, + { + "epoch": 3.16, + "learning_rate": 1.4193914654704021e-05, + "loss": 0.9779, + "step": 227000 + }, + { + "epoch": 3.16, + "learning_rate": 1.4186948829044708e-05, + "loss": 0.9608, + "step": 227100 + }, + { + "epoch": 3.17, + "learning_rate": 1.4179983003385392e-05, + "loss": 0.9876, + "step": 227200 + }, + { + "epoch": 3.17, + "learning_rate": 1.4173017177726077e-05, + "loss": 0.9501, + "step": 227300 + }, + { + "epoch": 3.17, + "learning_rate": 1.4166051352066762e-05, + "loss": 0.9924, + "step": 227400 + }, + { + "epoch": 3.17, + "learning_rate": 1.4159085526407446e-05, + "loss": 0.9669, + "step": 227500 + }, + { + "epoch": 3.17, + "learning_rate": 1.4152119700748131e-05, + "loss": 0.9911, + "step": 227600 + }, + { + "epoch": 3.17, + "learning_rate": 1.4145153875088816e-05, + "loss": 0.9584, + "step": 227700 + }, + { + "epoch": 3.17, + "learning_rate": 1.4138188049429499e-05, + "loss": 0.9577, + "step": 227800 + }, + { + "epoch": 3.18, + "learning_rate": 1.4131222223770183e-05, + "loss": 0.9707, + "step": 227900 + }, + { + "epoch": 3.18, + "learning_rate": 1.4124326056367462e-05, + "loss": 0.9537, + "step": 228000 + }, + { + "epoch": 3.18, + "learning_rate": 1.4117360230708146e-05, + "loss": 0.9705, + "step": 228100 + }, + { + "epoch": 3.18, + "learning_rate": 1.4110394405048831e-05, + "loss": 0.9787, + "step": 228200 + }, + { + "epoch": 3.18, + "learning_rate": 1.4103428579389516e-05, + "loss": 0.9366, + "step": 228300 + }, + { + "epoch": 3.18, + "learning_rate": 1.40964627537302e-05, + "loss": 0.9885, + "step": 228400 + }, + { + "epoch": 3.18, + "learning_rate": 1.4089496928070885e-05, + "loss": 1.0184, + "step": 228500 + }, + { + "epoch": 3.18, + "learning_rate": 1.408253110241157e-05, + "loss": 1.0104, + "step": 228600 + }, + { + "epoch": 3.19, + "learning_rate": 1.4075565276752254e-05, + "loss": 0.9694, + "step": 228700 + }, + { + "epoch": 3.19, + "learning_rate": 1.4068599451092939e-05, + "loss": 0.9776, + "step": 228800 + }, + { + "epoch": 3.19, + "learning_rate": 1.4061633625433624e-05, + "loss": 0.9612, + "step": 228900 + }, + { + "epoch": 3.19, + "learning_rate": 1.4054667799774308e-05, + "loss": 0.9772, + "step": 229000 + }, + { + "epoch": 3.19, + "learning_rate": 1.4047701974114991e-05, + "loss": 0.9404, + "step": 229100 + }, + { + "epoch": 3.19, + "learning_rate": 1.4040736148455676e-05, + "loss": 0.927, + "step": 229200 + }, + { + "epoch": 3.19, + "learning_rate": 1.403377032279636e-05, + "loss": 0.9735, + "step": 229300 + }, + { + "epoch": 3.2, + "learning_rate": 1.4026804497137045e-05, + "loss": 0.9728, + "step": 229400 + }, + { + "epoch": 3.2, + "learning_rate": 1.401983867147773e-05, + "loss": 0.9853, + "step": 229500 + }, + { + "epoch": 3.2, + "learning_rate": 1.4012872845818415e-05, + "loss": 0.9633, + "step": 229600 + }, + { + "epoch": 3.2, + "learning_rate": 1.40059070201591e-05, + "loss": 0.9723, + "step": 229700 + }, + { + "epoch": 3.2, + "learning_rate": 1.3998941194499784e-05, + "loss": 0.9575, + "step": 229800 + }, + { + "epoch": 3.2, + "learning_rate": 1.3991975368840469e-05, + "loss": 0.9777, + "step": 229900 + }, + { + "epoch": 3.2, + "learning_rate": 1.3985009543181154e-05, + "loss": 0.9754, + "step": 230000 + }, + { + "epoch": 3.21, + "learning_rate": 1.3978043717521838e-05, + "loss": 0.9709, + "step": 230100 + }, + { + "epoch": 3.21, + "learning_rate": 1.3971077891862521e-05, + "loss": 0.9731, + "step": 230200 + }, + { + "epoch": 3.21, + "learning_rate": 1.3964112066203206e-05, + "loss": 0.9468, + "step": 230300 + }, + { + "epoch": 3.21, + "learning_rate": 1.3957146240543892e-05, + "loss": 1.0028, + "step": 230400 + }, + { + "epoch": 3.21, + "learning_rate": 1.3950180414884577e-05, + "loss": 0.9617, + "step": 230500 + }, + { + "epoch": 3.21, + "learning_rate": 1.3943214589225262e-05, + "loss": 0.9848, + "step": 230600 + }, + { + "epoch": 3.21, + "learning_rate": 1.3936248763565946e-05, + "loss": 0.9833, + "step": 230700 + }, + { + "epoch": 3.22, + "learning_rate": 1.3929282937906631e-05, + "loss": 0.9816, + "step": 230800 + }, + { + "epoch": 3.22, + "learning_rate": 1.3922317112247316e-05, + "loss": 0.9549, + "step": 230900 + }, + { + "epoch": 3.22, + "learning_rate": 1.3915351286588e-05, + "loss": 0.9583, + "step": 231000 + }, + { + "epoch": 3.22, + "learning_rate": 1.3908385460928685e-05, + "loss": 0.99, + "step": 231100 + }, + { + "epoch": 3.22, + "learning_rate": 1.390141963526937e-05, + "loss": 0.9897, + "step": 231200 + }, + { + "epoch": 3.22, + "learning_rate": 1.3894453809610053e-05, + "loss": 0.9987, + "step": 231300 + }, + { + "epoch": 3.22, + "learning_rate": 1.3887487983950737e-05, + "loss": 0.9997, + "step": 231400 + }, + { + "epoch": 3.23, + "learning_rate": 1.3880522158291422e-05, + "loss": 0.9936, + "step": 231500 + }, + { + "epoch": 3.23, + "learning_rate": 1.3873556332632107e-05, + "loss": 0.9856, + "step": 231600 + }, + { + "epoch": 3.23, + "learning_rate": 1.3866660165229385e-05, + "loss": 0.9799, + "step": 231700 + }, + { + "epoch": 3.23, + "learning_rate": 1.385969433957007e-05, + "loss": 0.9946, + "step": 231800 + }, + { + "epoch": 3.23, + "learning_rate": 1.3852728513910754e-05, + "loss": 0.9955, + "step": 231900 + }, + { + "epoch": 3.23, + "learning_rate": 1.3845762688251439e-05, + "loss": 0.9836, + "step": 232000 + }, + { + "epoch": 3.23, + "learning_rate": 1.3838796862592124e-05, + "loss": 0.9599, + "step": 232100 + }, + { + "epoch": 3.23, + "learning_rate": 1.3831831036932808e-05, + "loss": 0.9956, + "step": 232200 + }, + { + "epoch": 3.24, + "learning_rate": 1.3824865211273493e-05, + "loss": 1.0097, + "step": 232300 + }, + { + "epoch": 3.24, + "learning_rate": 1.3817899385614178e-05, + "loss": 1.0054, + "step": 232400 + }, + { + "epoch": 3.24, + "learning_rate": 1.3810933559954862e-05, + "loss": 0.9995, + "step": 232500 + }, + { + "epoch": 3.24, + "learning_rate": 1.3803967734295545e-05, + "loss": 0.9805, + "step": 232600 + }, + { + "epoch": 3.24, + "learning_rate": 1.379700190863623e-05, + "loss": 0.9738, + "step": 232700 + }, + { + "epoch": 3.24, + "learning_rate": 1.3790036082976915e-05, + "loss": 0.969, + "step": 232800 + }, + { + "epoch": 3.24, + "learning_rate": 1.37830702573176e-05, + "loss": 0.9727, + "step": 232900 + }, + { + "epoch": 3.25, + "learning_rate": 1.3776104431658284e-05, + "loss": 0.9536, + "step": 233000 + }, + { + "epoch": 3.25, + "learning_rate": 1.3769138605998969e-05, + "loss": 0.9871, + "step": 233100 + }, + { + "epoch": 3.25, + "learning_rate": 1.3762172780339654e-05, + "loss": 1.0029, + "step": 233200 + }, + { + "epoch": 3.25, + "learning_rate": 1.3755206954680338e-05, + "loss": 0.9848, + "step": 233300 + }, + { + "epoch": 3.25, + "learning_rate": 1.3748241129021023e-05, + "loss": 1.0025, + "step": 233400 + }, + { + "epoch": 3.25, + "learning_rate": 1.3741275303361708e-05, + "loss": 0.9891, + "step": 233500 + }, + { + "epoch": 3.25, + "learning_rate": 1.3734309477702394e-05, + "loss": 0.997, + "step": 233600 + }, + { + "epoch": 3.26, + "learning_rate": 1.3727343652043079e-05, + "loss": 0.9582, + "step": 233700 + }, + { + "epoch": 3.26, + "learning_rate": 1.3720377826383762e-05, + "loss": 0.9687, + "step": 233800 + }, + { + "epoch": 3.26, + "learning_rate": 1.3713412000724446e-05, + "loss": 0.9688, + "step": 233900 + }, + { + "epoch": 3.26, + "learning_rate": 1.3706446175065131e-05, + "loss": 0.9618, + "step": 234000 + }, + { + "epoch": 3.26, + "learning_rate": 1.3699550007662407e-05, + "loss": 0.9736, + "step": 234100 + }, + { + "epoch": 3.26, + "learning_rate": 1.3692584182003092e-05, + "loss": 0.983, + "step": 234200 + }, + { + "epoch": 3.26, + "learning_rate": 1.3685618356343777e-05, + "loss": 0.963, + "step": 234300 + }, + { + "epoch": 3.27, + "learning_rate": 1.3678652530684463e-05, + "loss": 1.0001, + "step": 234400 + }, + { + "epoch": 3.27, + "learning_rate": 1.3671686705025148e-05, + "loss": 0.9748, + "step": 234500 + }, + { + "epoch": 3.27, + "learning_rate": 1.3664720879365833e-05, + "loss": 0.9721, + "step": 234600 + }, + { + "epoch": 3.27, + "learning_rate": 1.3657755053706517e-05, + "loss": 0.9612, + "step": 234700 + }, + { + "epoch": 3.27, + "learning_rate": 1.3650789228047202e-05, + "loss": 0.993, + "step": 234800 + }, + { + "epoch": 3.27, + "learning_rate": 1.3643823402387887e-05, + "loss": 0.9825, + "step": 234900 + }, + { + "epoch": 3.27, + "learning_rate": 1.3636857576728571e-05, + "loss": 1.0081, + "step": 235000 + }, + { + "epoch": 3.28, + "learning_rate": 1.3629891751069254e-05, + "loss": 0.9997, + "step": 235100 + }, + { + "epoch": 3.28, + "learning_rate": 1.3622925925409939e-05, + "loss": 0.9768, + "step": 235200 + }, + { + "epoch": 3.28, + "learning_rate": 1.3615960099750624e-05, + "loss": 1.0161, + "step": 235300 + }, + { + "epoch": 3.28, + "learning_rate": 1.3608994274091308e-05, + "loss": 1.0124, + "step": 235400 + }, + { + "epoch": 3.28, + "learning_rate": 1.3602028448431993e-05, + "loss": 0.9675, + "step": 235500 + }, + { + "epoch": 3.28, + "learning_rate": 1.3595062622772678e-05, + "loss": 0.9708, + "step": 235600 + }, + { + "epoch": 3.28, + "learning_rate": 1.3588096797113362e-05, + "loss": 0.9902, + "step": 235700 + }, + { + "epoch": 3.29, + "learning_rate": 1.3581130971454047e-05, + "loss": 0.983, + "step": 235800 + }, + { + "epoch": 3.29, + "learning_rate": 1.3574165145794732e-05, + "loss": 0.9454, + "step": 235900 + }, + { + "epoch": 3.29, + "learning_rate": 1.3567199320135416e-05, + "loss": 0.9914, + "step": 236000 + }, + { + "epoch": 3.29, + "learning_rate": 1.3560233494476101e-05, + "loss": 1.0168, + "step": 236100 + }, + { + "epoch": 3.29, + "learning_rate": 1.3553267668816784e-05, + "loss": 0.9914, + "step": 236200 + }, + { + "epoch": 3.29, + "learning_rate": 1.3546301843157469e-05, + "loss": 0.9612, + "step": 236300 + }, + { + "epoch": 3.29, + "learning_rate": 1.3539336017498153e-05, + "loss": 0.974, + "step": 236400 + }, + { + "epoch": 3.29, + "learning_rate": 1.3532370191838838e-05, + "loss": 0.9705, + "step": 236500 + }, + { + "epoch": 3.3, + "learning_rate": 1.3525404366179523e-05, + "loss": 0.9984, + "step": 236600 + }, + { + "epoch": 3.3, + "learning_rate": 1.3518438540520207e-05, + "loss": 0.9975, + "step": 236700 + }, + { + "epoch": 3.3, + "learning_rate": 1.3511472714860892e-05, + "loss": 0.9893, + "step": 236800 + }, + { + "epoch": 3.3, + "learning_rate": 1.3504506889201579e-05, + "loss": 0.9882, + "step": 236900 + }, + { + "epoch": 3.3, + "learning_rate": 1.3497541063542263e-05, + "loss": 1.0054, + "step": 237000 + }, + { + "epoch": 3.3, + "learning_rate": 1.3490575237882948e-05, + "loss": 0.9796, + "step": 237100 + }, + { + "epoch": 3.3, + "learning_rate": 1.3483609412223633e-05, + "loss": 0.9589, + "step": 237200 + }, + { + "epoch": 3.31, + "learning_rate": 1.3476713244820909e-05, + "loss": 1.0009, + "step": 237300 + }, + { + "epoch": 3.31, + "learning_rate": 1.3469747419161594e-05, + "loss": 0.9728, + "step": 237400 + }, + { + "epoch": 3.31, + "learning_rate": 1.3462781593502277e-05, + "loss": 0.9735, + "step": 237500 + }, + { + "epoch": 3.31, + "learning_rate": 1.3455815767842961e-05, + "loss": 0.9765, + "step": 237600 + }, + { + "epoch": 3.31, + "learning_rate": 1.3448849942183648e-05, + "loss": 0.9593, + "step": 237700 + }, + { + "epoch": 3.31, + "learning_rate": 1.3441884116524332e-05, + "loss": 1.0069, + "step": 237800 + }, + { + "epoch": 3.31, + "learning_rate": 1.3434918290865017e-05, + "loss": 0.9707, + "step": 237900 + }, + { + "epoch": 3.32, + "learning_rate": 1.3427952465205702e-05, + "loss": 0.9854, + "step": 238000 + }, + { + "epoch": 3.32, + "learning_rate": 1.3420986639546386e-05, + "loss": 0.9702, + "step": 238100 + }, + { + "epoch": 3.32, + "learning_rate": 1.3414020813887071e-05, + "loss": 1.0152, + "step": 238200 + }, + { + "epoch": 3.32, + "learning_rate": 1.3407054988227756e-05, + "loss": 1.0205, + "step": 238300 + }, + { + "epoch": 3.32, + "learning_rate": 1.340008916256844e-05, + "loss": 0.9861, + "step": 238400 + }, + { + "epoch": 3.32, + "learning_rate": 1.3393123336909125e-05, + "loss": 1.0026, + "step": 238500 + }, + { + "epoch": 3.32, + "learning_rate": 1.3386157511249808e-05, + "loss": 1.0146, + "step": 238600 + }, + { + "epoch": 3.33, + "learning_rate": 1.3379191685590493e-05, + "loss": 0.9556, + "step": 238700 + }, + { + "epoch": 3.33, + "learning_rate": 1.3372225859931178e-05, + "loss": 0.9612, + "step": 238800 + }, + { + "epoch": 3.33, + "learning_rate": 1.3365260034271862e-05, + "loss": 0.9914, + "step": 238900 + }, + { + "epoch": 3.33, + "learning_rate": 1.3358294208612547e-05, + "loss": 0.9871, + "step": 239000 + }, + { + "epoch": 3.33, + "learning_rate": 1.3351328382953232e-05, + "loss": 0.995, + "step": 239100 + }, + { + "epoch": 3.33, + "learning_rate": 1.3344362557293916e-05, + "loss": 0.9541, + "step": 239200 + }, + { + "epoch": 3.33, + "learning_rate": 1.3337396731634601e-05, + "loss": 0.9721, + "step": 239300 + }, + { + "epoch": 3.34, + "learning_rate": 1.3330430905975286e-05, + "loss": 1.0121, + "step": 239400 + }, + { + "epoch": 3.34, + "learning_rate": 1.332346508031597e-05, + "loss": 0.9509, + "step": 239500 + }, + { + "epoch": 3.34, + "learning_rate": 1.3316568912913248e-05, + "loss": 0.9673, + "step": 239600 + }, + { + "epoch": 3.34, + "learning_rate": 1.3309603087253933e-05, + "loss": 0.9598, + "step": 239700 + }, + { + "epoch": 3.34, + "learning_rate": 1.3302637261594618e-05, + "loss": 0.982, + "step": 239800 + }, + { + "epoch": 3.34, + "learning_rate": 1.32956714359353e-05, + "loss": 0.9916, + "step": 239900 + }, + { + "epoch": 3.34, + "learning_rate": 1.3288705610275986e-05, + "loss": 0.9989, + "step": 240000 + }, + { + "epoch": 3.34, + "learning_rate": 1.328173978461667e-05, + "loss": 0.9761, + "step": 240100 + }, + { + "epoch": 3.35, + "learning_rate": 1.3274773958957355e-05, + "loss": 0.9695, + "step": 240200 + }, + { + "epoch": 3.35, + "learning_rate": 1.326780813329804e-05, + "loss": 0.9946, + "step": 240300 + }, + { + "epoch": 3.35, + "learning_rate": 1.3260842307638724e-05, + "loss": 0.9662, + "step": 240400 + }, + { + "epoch": 3.35, + "learning_rate": 1.3253876481979409e-05, + "loss": 1.0043, + "step": 240500 + }, + { + "epoch": 3.35, + "learning_rate": 1.3246910656320094e-05, + "loss": 0.9889, + "step": 240600 + }, + { + "epoch": 3.35, + "learning_rate": 1.3239944830660778e-05, + "loss": 0.9748, + "step": 240700 + }, + { + "epoch": 3.35, + "learning_rate": 1.3232979005001463e-05, + "loss": 0.9632, + "step": 240800 + }, + { + "epoch": 3.36, + "learning_rate": 1.3226013179342148e-05, + "loss": 1.026, + "step": 240900 + }, + { + "epoch": 3.36, + "learning_rate": 1.3219047353682832e-05, + "loss": 0.9639, + "step": 241000 + }, + { + "epoch": 3.36, + "learning_rate": 1.3212081528023517e-05, + "loss": 0.973, + "step": 241100 + }, + { + "epoch": 3.36, + "learning_rate": 1.3205115702364202e-05, + "loss": 0.9861, + "step": 241200 + }, + { + "epoch": 3.36, + "learning_rate": 1.3198149876704886e-05, + "loss": 0.9995, + "step": 241300 + }, + { + "epoch": 3.36, + "learning_rate": 1.3191184051045571e-05, + "loss": 0.9796, + "step": 241400 + }, + { + "epoch": 3.36, + "learning_rate": 1.3184218225386256e-05, + "loss": 0.9778, + "step": 241500 + }, + { + "epoch": 3.37, + "learning_rate": 1.317725239972694e-05, + "loss": 0.9788, + "step": 241600 + }, + { + "epoch": 3.37, + "learning_rate": 1.3170286574067625e-05, + "loss": 1.0043, + "step": 241700 + }, + { + "epoch": 3.37, + "learning_rate": 1.316332074840831e-05, + "loss": 0.9934, + "step": 241800 + }, + { + "epoch": 3.37, + "learning_rate": 1.3156354922748994e-05, + "loss": 0.976, + "step": 241900 + }, + { + "epoch": 3.37, + "learning_rate": 1.3149458755346273e-05, + "loss": 0.9997, + "step": 242000 + }, + { + "epoch": 3.37, + "learning_rate": 1.3142492929686957e-05, + "loss": 0.9845, + "step": 242100 + }, + { + "epoch": 3.37, + "learning_rate": 1.3135527104027642e-05, + "loss": 0.9935, + "step": 242200 + }, + { + "epoch": 3.38, + "learning_rate": 1.3128561278368325e-05, + "loss": 0.9589, + "step": 242300 + }, + { + "epoch": 3.38, + "learning_rate": 1.312159545270901e-05, + "loss": 0.9656, + "step": 242400 + }, + { + "epoch": 3.38, + "learning_rate": 1.3114629627049694e-05, + "loss": 0.9979, + "step": 242500 + }, + { + "epoch": 3.38, + "learning_rate": 1.3107663801390379e-05, + "loss": 0.9738, + "step": 242600 + }, + { + "epoch": 3.38, + "learning_rate": 1.3100697975731064e-05, + "loss": 1.0074, + "step": 242700 + }, + { + "epoch": 3.38, + "learning_rate": 1.3093732150071748e-05, + "loss": 1.0119, + "step": 242800 + }, + { + "epoch": 3.38, + "learning_rate": 1.3086766324412433e-05, + "loss": 0.9412, + "step": 242900 + }, + { + "epoch": 3.39, + "learning_rate": 1.3079800498753118e-05, + "loss": 1.0002, + "step": 243000 + }, + { + "epoch": 3.39, + "learning_rate": 1.3072834673093802e-05, + "loss": 0.9525, + "step": 243100 + }, + { + "epoch": 3.39, + "learning_rate": 1.3065868847434487e-05, + "loss": 0.959, + "step": 243200 + }, + { + "epoch": 3.39, + "learning_rate": 1.3058903021775172e-05, + "loss": 1.0097, + "step": 243300 + }, + { + "epoch": 3.39, + "learning_rate": 1.3051937196115855e-05, + "loss": 0.9807, + "step": 243400 + }, + { + "epoch": 3.39, + "learning_rate": 1.304497137045654e-05, + "loss": 0.9537, + "step": 243500 + }, + { + "epoch": 3.39, + "learning_rate": 1.3038005544797224e-05, + "loss": 0.9929, + "step": 243600 + }, + { + "epoch": 3.4, + "learning_rate": 1.3031039719137909e-05, + "loss": 0.9862, + "step": 243700 + }, + { + "epoch": 3.4, + "learning_rate": 1.3024143551735187e-05, + "loss": 0.9913, + "step": 243800 + }, + { + "epoch": 3.4, + "learning_rate": 1.3017177726075872e-05, + "loss": 0.9867, + "step": 243900 + }, + { + "epoch": 3.4, + "learning_rate": 1.3010211900416556e-05, + "loss": 0.9704, + "step": 244000 + }, + { + "epoch": 3.4, + "learning_rate": 1.3003246074757241e-05, + "loss": 0.9614, + "step": 244100 + }, + { + "epoch": 3.4, + "learning_rate": 1.2996280249097926e-05, + "loss": 0.9972, + "step": 244200 + }, + { + "epoch": 3.4, + "learning_rate": 1.298931442343861e-05, + "loss": 0.9898, + "step": 244300 + }, + { + "epoch": 3.4, + "learning_rate": 1.2982348597779295e-05, + "loss": 0.9753, + "step": 244400 + }, + { + "epoch": 3.41, + "learning_rate": 1.297538277211998e-05, + "loss": 0.9839, + "step": 244500 + }, + { + "epoch": 3.41, + "learning_rate": 1.2968416946460664e-05, + "loss": 0.9777, + "step": 244600 + }, + { + "epoch": 3.41, + "learning_rate": 1.2961451120801347e-05, + "loss": 0.9914, + "step": 244700 + }, + { + "epoch": 3.41, + "learning_rate": 1.2954485295142032e-05, + "loss": 0.9803, + "step": 244800 + }, + { + "epoch": 3.41, + "learning_rate": 1.2947519469482717e-05, + "loss": 0.9592, + "step": 244900 + }, + { + "epoch": 3.41, + "learning_rate": 1.2940553643823403e-05, + "loss": 0.9881, + "step": 245000 + }, + { + "epoch": 3.41, + "learning_rate": 1.2933587818164088e-05, + "loss": 0.9817, + "step": 245100 + }, + { + "epoch": 3.42, + "learning_rate": 1.2926621992504773e-05, + "loss": 0.9738, + "step": 245200 + }, + { + "epoch": 3.42, + "learning_rate": 1.2919656166845457e-05, + "loss": 0.9989, + "step": 245300 + }, + { + "epoch": 3.42, + "learning_rate": 1.2912690341186142e-05, + "loss": 0.9551, + "step": 245400 + }, + { + "epoch": 3.42, + "learning_rate": 1.2905724515526827e-05, + "loss": 0.9904, + "step": 245500 + }, + { + "epoch": 3.42, + "learning_rate": 1.2898758689867511e-05, + "loss": 0.9919, + "step": 245600 + }, + { + "epoch": 3.42, + "learning_rate": 1.2891792864208196e-05, + "loss": 0.9967, + "step": 245700 + }, + { + "epoch": 3.42, + "learning_rate": 1.288482703854888e-05, + "loss": 1.0002, + "step": 245800 + }, + { + "epoch": 3.43, + "learning_rate": 1.2877861212889564e-05, + "loss": 0.9913, + "step": 245900 + }, + { + "epoch": 3.43, + "learning_rate": 1.2870895387230248e-05, + "loss": 0.9849, + "step": 246000 + }, + { + "epoch": 3.43, + "learning_rate": 1.2863929561570933e-05, + "loss": 0.9998, + "step": 246100 + }, + { + "epoch": 3.43, + "learning_rate": 1.2856963735911618e-05, + "loss": 1.0058, + "step": 246200 + }, + { + "epoch": 3.43, + "learning_rate": 1.2849997910252302e-05, + "loss": 0.9815, + "step": 246300 + }, + { + "epoch": 3.43, + "learning_rate": 1.2843032084592987e-05, + "loss": 0.9764, + "step": 246400 + }, + { + "epoch": 3.43, + "learning_rate": 1.2836066258933672e-05, + "loss": 1.0131, + "step": 246500 + }, + { + "epoch": 3.44, + "learning_rate": 1.2829100433274356e-05, + "loss": 0.9728, + "step": 246600 + }, + { + "epoch": 3.44, + "learning_rate": 1.2822134607615041e-05, + "loss": 0.9944, + "step": 246700 + }, + { + "epoch": 3.44, + "learning_rate": 1.2815168781955726e-05, + "loss": 0.9332, + "step": 246800 + }, + { + "epoch": 3.44, + "learning_rate": 1.280820295629641e-05, + "loss": 0.961, + "step": 246900 + }, + { + "epoch": 3.44, + "learning_rate": 1.2801237130637093e-05, + "loss": 1.0046, + "step": 247000 + }, + { + "epoch": 3.44, + "learning_rate": 1.2794271304977778e-05, + "loss": 0.975, + "step": 247100 + }, + { + "epoch": 3.44, + "learning_rate": 1.2787305479318463e-05, + "loss": 0.9783, + "step": 247200 + }, + { + "epoch": 3.45, + "learning_rate": 1.2780339653659147e-05, + "loss": 0.9852, + "step": 247300 + }, + { + "epoch": 3.45, + "learning_rate": 1.2773373827999832e-05, + "loss": 1.0075, + "step": 247400 + }, + { + "epoch": 3.45, + "learning_rate": 1.2766408002340519e-05, + "loss": 0.9754, + "step": 247500 + }, + { + "epoch": 3.45, + "learning_rate": 1.2759442176681203e-05, + "loss": 0.9657, + "step": 247600 + }, + { + "epoch": 3.45, + "learning_rate": 1.2752476351021888e-05, + "loss": 1.0082, + "step": 247700 + }, + { + "epoch": 3.45, + "learning_rate": 1.2745510525362573e-05, + "loss": 0.9673, + "step": 247800 + }, + { + "epoch": 3.45, + "learning_rate": 1.2738544699703257e-05, + "loss": 0.9751, + "step": 247900 + }, + { + "epoch": 3.46, + "learning_rate": 1.2731578874043942e-05, + "loss": 1.0117, + "step": 248000 + }, + { + "epoch": 3.46, + "learning_rate": 1.2724613048384625e-05, + "loss": 1.0074, + "step": 248100 + }, + { + "epoch": 3.46, + "learning_rate": 1.2717716880981903e-05, + "loss": 1.0053, + "step": 248200 + }, + { + "epoch": 3.46, + "learning_rate": 1.2710751055322588e-05, + "loss": 1.0105, + "step": 248300 + }, + { + "epoch": 3.46, + "learning_rate": 1.2703785229663272e-05, + "loss": 0.9761, + "step": 248400 + }, + { + "epoch": 3.46, + "learning_rate": 1.2696819404003957e-05, + "loss": 0.9702, + "step": 248500 + }, + { + "epoch": 3.46, + "learning_rate": 1.2689853578344642e-05, + "loss": 1.0102, + "step": 248600 + }, + { + "epoch": 3.46, + "learning_rate": 1.2682887752685327e-05, + "loss": 0.9613, + "step": 248700 + }, + { + "epoch": 3.47, + "learning_rate": 1.2675921927026011e-05, + "loss": 0.9878, + "step": 248800 + }, + { + "epoch": 3.47, + "learning_rate": 1.2668956101366696e-05, + "loss": 0.9962, + "step": 248900 + }, + { + "epoch": 3.47, + "learning_rate": 1.266199027570738e-05, + "loss": 0.9722, + "step": 249000 + }, + { + "epoch": 3.47, + "learning_rate": 1.2655024450048065e-05, + "loss": 0.9736, + "step": 249100 + }, + { + "epoch": 3.47, + "learning_rate": 1.264805862438875e-05, + "loss": 0.9761, + "step": 249200 + }, + { + "epoch": 3.47, + "learning_rate": 1.2641092798729435e-05, + "loss": 0.9865, + "step": 249300 + }, + { + "epoch": 3.47, + "learning_rate": 1.2634126973070118e-05, + "loss": 1.0022, + "step": 249400 + }, + { + "epoch": 3.48, + "learning_rate": 1.2627161147410802e-05, + "loss": 0.9916, + "step": 249500 + }, + { + "epoch": 3.48, + "learning_rate": 1.2620195321751487e-05, + "loss": 0.9595, + "step": 249600 + }, + { + "epoch": 3.48, + "learning_rate": 1.2613229496092172e-05, + "loss": 0.9772, + "step": 249700 + }, + { + "epoch": 3.48, + "learning_rate": 1.2606263670432856e-05, + "loss": 0.99, + "step": 249800 + }, + { + "epoch": 3.48, + "learning_rate": 1.2599297844773541e-05, + "loss": 0.9679, + "step": 249900 + }, + { + "epoch": 3.48, + "learning_rate": 1.2592332019114226e-05, + "loss": 1.0168, + "step": 250000 + }, + { + "epoch": 3.48, + "learning_rate": 1.258536619345491e-05, + "loss": 0.9696, + "step": 250100 + }, + { + "epoch": 3.49, + "learning_rate": 1.2578400367795595e-05, + "loss": 0.9751, + "step": 250200 + }, + { + "epoch": 3.49, + "learning_rate": 1.257143454213628e-05, + "loss": 0.9922, + "step": 250300 + }, + { + "epoch": 3.49, + "learning_rate": 1.2564468716476964e-05, + "loss": 0.9825, + "step": 250400 + }, + { + "epoch": 3.49, + "learning_rate": 1.2557572549074243e-05, + "loss": 0.9756, + "step": 250500 + }, + { + "epoch": 3.49, + "learning_rate": 1.2550606723414927e-05, + "loss": 0.9796, + "step": 250600 + }, + { + "epoch": 3.49, + "learning_rate": 1.254364089775561e-05, + "loss": 0.9671, + "step": 250700 + }, + { + "epoch": 3.49, + "learning_rate": 1.2536675072096295e-05, + "loss": 0.9738, + "step": 250800 + }, + { + "epoch": 3.5, + "learning_rate": 1.252970924643698e-05, + "loss": 1.0037, + "step": 250900 + }, + { + "epoch": 3.5, + "learning_rate": 1.2522743420777664e-05, + "loss": 0.9856, + "step": 251000 + }, + { + "epoch": 3.5, + "learning_rate": 1.2515777595118349e-05, + "loss": 0.9816, + "step": 251100 + }, + { + "epoch": 3.5, + "learning_rate": 1.2508811769459034e-05, + "loss": 0.982, + "step": 251200 + }, + { + "epoch": 3.5, + "learning_rate": 1.2501845943799718e-05, + "loss": 0.9655, + "step": 251300 + }, + { + "epoch": 3.5, + "learning_rate": 1.2494880118140403e-05, + "loss": 0.9474, + "step": 251400 + }, + { + "epoch": 3.5, + "learning_rate": 1.2487914292481088e-05, + "loss": 1.0095, + "step": 251500 + }, + { + "epoch": 3.51, + "learning_rate": 1.2480948466821774e-05, + "loss": 0.9443, + "step": 251600 + }, + { + "epoch": 3.51, + "learning_rate": 1.2473982641162459e-05, + "loss": 1.0083, + "step": 251700 + }, + { + "epoch": 3.51, + "learning_rate": 1.2467016815503142e-05, + "loss": 0.9412, + "step": 251800 + }, + { + "epoch": 3.51, + "learning_rate": 1.2460050989843826e-05, + "loss": 0.9921, + "step": 251900 + }, + { + "epoch": 3.51, + "learning_rate": 1.2453085164184511e-05, + "loss": 0.9875, + "step": 252000 + }, + { + "epoch": 3.51, + "learning_rate": 1.2446119338525196e-05, + "loss": 0.9961, + "step": 252100 + }, + { + "epoch": 3.51, + "learning_rate": 1.243915351286588e-05, + "loss": 0.9483, + "step": 252200 + }, + { + "epoch": 3.51, + "learning_rate": 1.2432187687206565e-05, + "loss": 0.9692, + "step": 252300 + }, + { + "epoch": 3.52, + "learning_rate": 1.242522186154725e-05, + "loss": 0.9758, + "step": 252400 + }, + { + "epoch": 3.52, + "learning_rate": 1.2418256035887935e-05, + "loss": 0.9745, + "step": 252500 + }, + { + "epoch": 3.52, + "learning_rate": 1.241129021022862e-05, + "loss": 0.9939, + "step": 252600 + }, + { + "epoch": 3.52, + "learning_rate": 1.2404324384569304e-05, + "loss": 1.0025, + "step": 252700 + }, + { + "epoch": 3.52, + "learning_rate": 1.2397358558909989e-05, + "loss": 0.9738, + "step": 252800 + }, + { + "epoch": 3.52, + "learning_rate": 1.2390392733250673e-05, + "loss": 0.957, + "step": 252900 + }, + { + "epoch": 3.52, + "learning_rate": 1.2383426907591356e-05, + "loss": 0.9619, + "step": 253000 + }, + { + "epoch": 3.53, + "learning_rate": 1.2376461081932041e-05, + "loss": 0.9671, + "step": 253100 + }, + { + "epoch": 3.53, + "learning_rate": 1.2369564914529319e-05, + "loss": 0.9897, + "step": 253200 + }, + { + "epoch": 3.53, + "learning_rate": 1.2362599088870004e-05, + "loss": 0.9783, + "step": 253300 + }, + { + "epoch": 3.53, + "learning_rate": 1.2355633263210688e-05, + "loss": 0.9969, + "step": 253400 + }, + { + "epoch": 3.53, + "learning_rate": 1.2348667437551373e-05, + "loss": 0.9811, + "step": 253500 + }, + { + "epoch": 3.53, + "learning_rate": 1.2341701611892058e-05, + "loss": 0.9867, + "step": 253600 + }, + { + "epoch": 3.53, + "learning_rate": 1.2334805444489336e-05, + "loss": 1.0169, + "step": 253700 + }, + { + "epoch": 3.54, + "learning_rate": 1.232783961883002e-05, + "loss": 0.9899, + "step": 253800 + }, + { + "epoch": 3.54, + "learning_rate": 1.2320873793170705e-05, + "loss": 1.0126, + "step": 253900 + }, + { + "epoch": 3.54, + "learning_rate": 1.231390796751139e-05, + "loss": 0.9635, + "step": 254000 + }, + { + "epoch": 3.54, + "learning_rate": 1.2306942141852075e-05, + "loss": 1.0019, + "step": 254100 + }, + { + "epoch": 3.54, + "learning_rate": 1.229997631619276e-05, + "loss": 0.9804, + "step": 254200 + }, + { + "epoch": 3.54, + "learning_rate": 1.2293010490533444e-05, + "loss": 0.9913, + "step": 254300 + }, + { + "epoch": 3.54, + "learning_rate": 1.2286044664874127e-05, + "loss": 0.9763, + "step": 254400 + }, + { + "epoch": 3.55, + "learning_rate": 1.2279078839214812e-05, + "loss": 0.9634, + "step": 254500 + }, + { + "epoch": 3.55, + "learning_rate": 1.2272113013555496e-05, + "loss": 0.9622, + "step": 254600 + }, + { + "epoch": 3.55, + "learning_rate": 1.2265147187896181e-05, + "loss": 0.9788, + "step": 254700 + }, + { + "epoch": 3.55, + "learning_rate": 1.2258181362236866e-05, + "loss": 1.0044, + "step": 254800 + }, + { + "epoch": 3.55, + "learning_rate": 1.225121553657755e-05, + "loss": 0.9843, + "step": 254900 + }, + { + "epoch": 3.55, + "learning_rate": 1.2244249710918235e-05, + "loss": 0.9691, + "step": 255000 + }, + { + "epoch": 3.55, + "learning_rate": 1.223728388525892e-05, + "loss": 0.9985, + "step": 255100 + }, + { + "epoch": 3.56, + "learning_rate": 1.2230318059599604e-05, + "loss": 0.9796, + "step": 255200 + }, + { + "epoch": 3.56, + "learning_rate": 1.2223352233940289e-05, + "loss": 0.9725, + "step": 255300 + }, + { + "epoch": 3.56, + "learning_rate": 1.2216386408280974e-05, + "loss": 0.9806, + "step": 255400 + }, + { + "epoch": 3.56, + "learning_rate": 1.2209420582621659e-05, + "loss": 0.9748, + "step": 255500 + }, + { + "epoch": 3.56, + "learning_rate": 1.2202454756962343e-05, + "loss": 0.9934, + "step": 255600 + }, + { + "epoch": 3.56, + "learning_rate": 1.2195488931303028e-05, + "loss": 0.989, + "step": 255700 + }, + { + "epoch": 3.56, + "learning_rate": 1.2188523105643713e-05, + "loss": 1.0401, + "step": 255800 + }, + { + "epoch": 3.57, + "learning_rate": 1.2181557279984397e-05, + "loss": 1.0054, + "step": 255900 + }, + { + "epoch": 3.57, + "learning_rate": 1.2174591454325082e-05, + "loss": 0.9631, + "step": 256000 + }, + { + "epoch": 3.57, + "learning_rate": 1.2167625628665767e-05, + "loss": 0.9756, + "step": 256100 + }, + { + "epoch": 3.57, + "learning_rate": 1.2160729461263043e-05, + "loss": 0.9753, + "step": 256200 + }, + { + "epoch": 3.57, + "learning_rate": 1.2153763635603728e-05, + "loss": 0.9604, + "step": 256300 + }, + { + "epoch": 3.57, + "learning_rate": 1.2146797809944412e-05, + "loss": 0.9536, + "step": 256400 + }, + { + "epoch": 3.57, + "learning_rate": 1.2139831984285099e-05, + "loss": 0.9592, + "step": 256500 + }, + { + "epoch": 3.57, + "learning_rate": 1.2132866158625783e-05, + "loss": 0.9903, + "step": 256600 + }, + { + "epoch": 3.58, + "learning_rate": 1.2125900332966468e-05, + "loss": 0.9939, + "step": 256700 + }, + { + "epoch": 3.58, + "learning_rate": 1.2118934507307153e-05, + "loss": 0.9895, + "step": 256800 + }, + { + "epoch": 3.58, + "learning_rate": 1.2111968681647836e-05, + "loss": 1.0161, + "step": 256900 + }, + { + "epoch": 3.58, + "learning_rate": 1.210500285598852e-05, + "loss": 0.9908, + "step": 257000 + }, + { + "epoch": 3.58, + "learning_rate": 1.2098037030329205e-05, + "loss": 0.968, + "step": 257100 + }, + { + "epoch": 3.58, + "learning_rate": 1.209107120466989e-05, + "loss": 0.9951, + "step": 257200 + }, + { + "epoch": 3.58, + "learning_rate": 1.2084105379010575e-05, + "loss": 1.0156, + "step": 257300 + }, + { + "epoch": 3.59, + "learning_rate": 1.207713955335126e-05, + "loss": 0.9902, + "step": 257400 + }, + { + "epoch": 3.59, + "learning_rate": 1.2070173727691944e-05, + "loss": 0.9568, + "step": 257500 + }, + { + "epoch": 3.59, + "learning_rate": 1.2063207902032629e-05, + "loss": 0.9984, + "step": 257600 + }, + { + "epoch": 3.59, + "learning_rate": 1.2056242076373313e-05, + "loss": 0.9871, + "step": 257700 + }, + { + "epoch": 3.59, + "learning_rate": 1.2049276250713998e-05, + "loss": 0.9664, + "step": 257800 + }, + { + "epoch": 3.59, + "learning_rate": 1.2042310425054683e-05, + "loss": 0.9597, + "step": 257900 + }, + { + "epoch": 3.59, + "learning_rate": 1.2035344599395366e-05, + "loss": 1.025, + "step": 258000 + }, + { + "epoch": 3.6, + "learning_rate": 1.202837877373605e-05, + "loss": 0.9871, + "step": 258100 + }, + { + "epoch": 3.6, + "learning_rate": 1.2021412948076735e-05, + "loss": 1.0194, + "step": 258200 + }, + { + "epoch": 3.6, + "learning_rate": 1.201444712241742e-05, + "loss": 1.0025, + "step": 258300 + }, + { + "epoch": 3.6, + "learning_rate": 1.2007481296758104e-05, + "loss": 1.0267, + "step": 258400 + }, + { + "epoch": 3.6, + "learning_rate": 1.2000515471098789e-05, + "loss": 0.9666, + "step": 258500 + }, + { + "epoch": 3.6, + "learning_rate": 1.1993549645439474e-05, + "loss": 0.9595, + "step": 258600 + }, + { + "epoch": 3.6, + "learning_rate": 1.1986583819780158e-05, + "loss": 0.9739, + "step": 258700 + }, + { + "epoch": 3.61, + "learning_rate": 1.1979617994120843e-05, + "loss": 0.9822, + "step": 258800 + }, + { + "epoch": 3.61, + "learning_rate": 1.197265216846153e-05, + "loss": 0.975, + "step": 258900 + }, + { + "epoch": 3.61, + "learning_rate": 1.1965686342802214e-05, + "loss": 0.9816, + "step": 259000 + }, + { + "epoch": 3.61, + "learning_rate": 1.1958720517142897e-05, + "loss": 0.9882, + "step": 259100 + }, + { + "epoch": 3.61, + "learning_rate": 1.1951754691483582e-05, + "loss": 0.955, + "step": 259200 + }, + { + "epoch": 3.61, + "learning_rate": 1.1944788865824267e-05, + "loss": 1.0038, + "step": 259300 + }, + { + "epoch": 3.61, + "learning_rate": 1.1937823040164951e-05, + "loss": 0.9749, + "step": 259400 + }, + { + "epoch": 3.62, + "learning_rate": 1.1930857214505636e-05, + "loss": 1.0001, + "step": 259500 + }, + { + "epoch": 3.62, + "learning_rate": 1.192389138884632e-05, + "loss": 0.9507, + "step": 259600 + }, + { + "epoch": 3.62, + "learning_rate": 1.1916925563187005e-05, + "loss": 0.9973, + "step": 259700 + }, + { + "epoch": 3.62, + "learning_rate": 1.190995973752769e-05, + "loss": 0.989, + "step": 259800 + }, + { + "epoch": 3.62, + "learning_rate": 1.1902993911868375e-05, + "loss": 1.0236, + "step": 259900 + }, + { + "epoch": 3.62, + "learning_rate": 1.189602808620906e-05, + "loss": 0.9796, + "step": 260000 + }, + { + "epoch": 3.62, + "learning_rate": 1.1889062260549744e-05, + "loss": 1.0098, + "step": 260100 + }, + { + "epoch": 3.63, + "learning_rate": 1.1882096434890427e-05, + "loss": 0.986, + "step": 260200 + }, + { + "epoch": 3.63, + "learning_rate": 1.1875130609231112e-05, + "loss": 0.9521, + "step": 260300 + }, + { + "epoch": 3.63, + "learning_rate": 1.1868164783571796e-05, + "loss": 0.9656, + "step": 260400 + }, + { + "epoch": 3.63, + "learning_rate": 1.1861198957912481e-05, + "loss": 0.9844, + "step": 260500 + }, + { + "epoch": 3.63, + "learning_rate": 1.1854233132253166e-05, + "loss": 0.9965, + "step": 260600 + }, + { + "epoch": 3.63, + "learning_rate": 1.184726730659385e-05, + "loss": 0.9732, + "step": 260700 + }, + { + "epoch": 3.63, + "learning_rate": 1.1840371139191129e-05, + "loss": 0.9969, + "step": 260800 + }, + { + "epoch": 3.63, + "learning_rate": 1.1833405313531813e-05, + "loss": 0.9837, + "step": 260900 + }, + { + "epoch": 3.64, + "learning_rate": 1.1826439487872498e-05, + "loss": 1.0085, + "step": 261000 + }, + { + "epoch": 3.64, + "learning_rate": 1.1819473662213183e-05, + "loss": 0.9855, + "step": 261100 + }, + { + "epoch": 3.64, + "learning_rate": 1.1812507836553867e-05, + "loss": 0.9981, + "step": 261200 + }, + { + "epoch": 3.64, + "learning_rate": 1.1805542010894552e-05, + "loss": 1.0086, + "step": 261300 + }, + { + "epoch": 3.64, + "learning_rate": 1.1798576185235237e-05, + "loss": 0.9939, + "step": 261400 + }, + { + "epoch": 3.64, + "learning_rate": 1.179161035957592e-05, + "loss": 1.0008, + "step": 261500 + }, + { + "epoch": 3.64, + "learning_rate": 1.1784644533916604e-05, + "loss": 0.9769, + "step": 261600 + }, + { + "epoch": 3.65, + "learning_rate": 1.1777678708257289e-05, + "loss": 1.0016, + "step": 261700 + }, + { + "epoch": 3.65, + "learning_rate": 1.1770712882597974e-05, + "loss": 1.0011, + "step": 261800 + }, + { + "epoch": 3.65, + "learning_rate": 1.1763747056938658e-05, + "loss": 0.9821, + "step": 261900 + }, + { + "epoch": 3.65, + "learning_rate": 1.1756781231279343e-05, + "loss": 0.9878, + "step": 262000 + }, + { + "epoch": 3.65, + "learning_rate": 1.1749815405620028e-05, + "loss": 1.0042, + "step": 262100 + }, + { + "epoch": 3.65, + "learning_rate": 1.1742849579960714e-05, + "loss": 0.9959, + "step": 262200 + }, + { + "epoch": 3.65, + "learning_rate": 1.1735883754301399e-05, + "loss": 0.9901, + "step": 262300 + }, + { + "epoch": 3.66, + "learning_rate": 1.1728917928642083e-05, + "loss": 0.9912, + "step": 262400 + }, + { + "epoch": 3.66, + "learning_rate": 1.1721952102982768e-05, + "loss": 0.9897, + "step": 262500 + }, + { + "epoch": 3.66, + "learning_rate": 1.1714986277323451e-05, + "loss": 0.949, + "step": 262600 + }, + { + "epoch": 3.66, + "learning_rate": 1.1708020451664136e-05, + "loss": 0.9607, + "step": 262700 + }, + { + "epoch": 3.66, + "learning_rate": 1.170105462600482e-05, + "loss": 0.9794, + "step": 262800 + }, + { + "epoch": 3.66, + "learning_rate": 1.1694088800345505e-05, + "loss": 0.9869, + "step": 262900 + }, + { + "epoch": 3.66, + "learning_rate": 1.168712297468619e-05, + "loss": 0.9877, + "step": 263000 + }, + { + "epoch": 3.67, + "learning_rate": 1.1680157149026875e-05, + "loss": 0.9649, + "step": 263100 + }, + { + "epoch": 3.67, + "learning_rate": 1.167319132336756e-05, + "loss": 0.9846, + "step": 263200 + }, + { + "epoch": 3.67, + "learning_rate": 1.1666225497708244e-05, + "loss": 0.994, + "step": 263300 + }, + { + "epoch": 3.67, + "learning_rate": 1.1659259672048929e-05, + "loss": 0.95, + "step": 263400 + }, + { + "epoch": 3.67, + "learning_rate": 1.1652293846389613e-05, + "loss": 0.9849, + "step": 263500 + }, + { + "epoch": 3.67, + "learning_rate": 1.1645328020730298e-05, + "loss": 0.9534, + "step": 263600 + }, + { + "epoch": 3.67, + "learning_rate": 1.1638362195070983e-05, + "loss": 0.97, + "step": 263700 + }, + { + "epoch": 3.68, + "learning_rate": 1.1631396369411666e-05, + "loss": 1.004, + "step": 263800 + }, + { + "epoch": 3.68, + "learning_rate": 1.162443054375235e-05, + "loss": 0.9785, + "step": 263900 + }, + { + "epoch": 3.68, + "learning_rate": 1.1617534376349628e-05, + "loss": 0.9533, + "step": 264000 + }, + { + "epoch": 3.68, + "learning_rate": 1.1610568550690313e-05, + "loss": 0.9831, + "step": 264100 + }, + { + "epoch": 3.68, + "learning_rate": 1.1603602725030998e-05, + "loss": 1.0029, + "step": 264200 + }, + { + "epoch": 3.68, + "learning_rate": 1.1596706557628276e-05, + "loss": 0.9957, + "step": 264300 + }, + { + "epoch": 3.68, + "learning_rate": 1.158974073196896e-05, + "loss": 1.0092, + "step": 264400 + }, + { + "epoch": 3.68, + "learning_rate": 1.1582774906309645e-05, + "loss": 0.9792, + "step": 264500 + }, + { + "epoch": 3.69, + "learning_rate": 1.157580908065033e-05, + "loss": 0.9532, + "step": 264600 + }, + { + "epoch": 3.69, + "learning_rate": 1.1568843254991015e-05, + "loss": 0.959, + "step": 264700 + }, + { + "epoch": 3.69, + "learning_rate": 1.15618774293317e-05, + "loss": 0.9557, + "step": 264800 + }, + { + "epoch": 3.69, + "learning_rate": 1.1554911603672384e-05, + "loss": 0.9499, + "step": 264900 + }, + { + "epoch": 3.69, + "learning_rate": 1.1547945778013069e-05, + "loss": 0.9418, + "step": 265000 + }, + { + "epoch": 3.69, + "learning_rate": 1.1540979952353753e-05, + "loss": 1.0181, + "step": 265100 + }, + { + "epoch": 3.69, + "learning_rate": 1.1534014126694436e-05, + "loss": 0.9944, + "step": 265200 + }, + { + "epoch": 3.7, + "learning_rate": 1.1527048301035121e-05, + "loss": 0.9985, + "step": 265300 + }, + { + "epoch": 3.7, + "learning_rate": 1.1520082475375806e-05, + "loss": 0.9748, + "step": 265400 + }, + { + "epoch": 3.7, + "learning_rate": 1.151311664971649e-05, + "loss": 0.9544, + "step": 265500 + }, + { + "epoch": 3.7, + "learning_rate": 1.1506150824057175e-05, + "loss": 0.9731, + "step": 265600 + }, + { + "epoch": 3.7, + "learning_rate": 1.149918499839786e-05, + "loss": 0.9681, + "step": 265700 + }, + { + "epoch": 3.7, + "learning_rate": 1.1492219172738544e-05, + "loss": 0.9926, + "step": 265800 + }, + { + "epoch": 3.7, + "learning_rate": 1.148525334707923e-05, + "loss": 0.9781, + "step": 265900 + }, + { + "epoch": 3.71, + "learning_rate": 1.1478287521419914e-05, + "loss": 0.9916, + "step": 266000 + }, + { + "epoch": 3.71, + "learning_rate": 1.1471321695760599e-05, + "loss": 0.9642, + "step": 266100 + }, + { + "epoch": 3.71, + "learning_rate": 1.1464355870101283e-05, + "loss": 0.9716, + "step": 266200 + }, + { + "epoch": 3.71, + "learning_rate": 1.145739004444197e-05, + "loss": 0.9785, + "step": 266300 + }, + { + "epoch": 3.71, + "learning_rate": 1.1450424218782653e-05, + "loss": 0.9607, + "step": 266400 + }, + { + "epoch": 3.71, + "learning_rate": 1.1443458393123337e-05, + "loss": 0.972, + "step": 266500 + }, + { + "epoch": 3.71, + "learning_rate": 1.1436492567464022e-05, + "loss": 0.9862, + "step": 266600 + }, + { + "epoch": 3.72, + "learning_rate": 1.1429526741804707e-05, + "loss": 0.9673, + "step": 266700 + }, + { + "epoch": 3.72, + "learning_rate": 1.1422560916145391e-05, + "loss": 0.9721, + "step": 266800 + }, + { + "epoch": 3.72, + "learning_rate": 1.1415595090486076e-05, + "loss": 0.9615, + "step": 266900 + }, + { + "epoch": 3.72, + "learning_rate": 1.140862926482676e-05, + "loss": 0.9706, + "step": 267000 + }, + { + "epoch": 3.72, + "learning_rate": 1.1401663439167445e-05, + "loss": 1.0159, + "step": 267100 + }, + { + "epoch": 3.72, + "learning_rate": 1.139469761350813e-05, + "loss": 0.997, + "step": 267200 + }, + { + "epoch": 3.72, + "learning_rate": 1.1387731787848815e-05, + "loss": 0.97, + "step": 267300 + }, + { + "epoch": 3.73, + "learning_rate": 1.13807659621895e-05, + "loss": 1.0179, + "step": 267400 + }, + { + "epoch": 3.73, + "learning_rate": 1.1373800136530182e-05, + "loss": 0.9738, + "step": 267500 + }, + { + "epoch": 3.73, + "learning_rate": 1.1366834310870867e-05, + "loss": 0.9676, + "step": 267600 + }, + { + "epoch": 3.73, + "learning_rate": 1.1359868485211552e-05, + "loss": 0.9892, + "step": 267700 + }, + { + "epoch": 3.73, + "learning_rate": 1.1352902659552236e-05, + "loss": 1.0037, + "step": 267800 + }, + { + "epoch": 3.73, + "learning_rate": 1.1345936833892921e-05, + "loss": 1.0042, + "step": 267900 + }, + { + "epoch": 3.73, + "learning_rate": 1.1338971008233606e-05, + "loss": 0.9883, + "step": 268000 + }, + { + "epoch": 3.74, + "learning_rate": 1.133200518257429e-05, + "loss": 1.0058, + "step": 268100 + }, + { + "epoch": 3.74, + "learning_rate": 1.1325039356914975e-05, + "loss": 0.9622, + "step": 268200 + }, + { + "epoch": 3.74, + "learning_rate": 1.131807353125566e-05, + "loss": 1.002, + "step": 268300 + }, + { + "epoch": 3.74, + "learning_rate": 1.1311107705596345e-05, + "loss": 0.9946, + "step": 268400 + }, + { + "epoch": 3.74, + "learning_rate": 1.130414187993703e-05, + "loss": 1.0038, + "step": 268500 + }, + { + "epoch": 3.74, + "learning_rate": 1.1297176054277712e-05, + "loss": 0.9864, + "step": 268600 + }, + { + "epoch": 3.74, + "learning_rate": 1.1290210228618399e-05, + "loss": 0.9908, + "step": 268700 + }, + { + "epoch": 3.74, + "learning_rate": 1.1283244402959083e-05, + "loss": 1.0071, + "step": 268800 + }, + { + "epoch": 3.75, + "learning_rate": 1.1276278577299768e-05, + "loss": 0.9852, + "step": 268900 + }, + { + "epoch": 3.75, + "learning_rate": 1.1269382409897044e-05, + "loss": 0.9787, + "step": 269000 + }, + { + "epoch": 3.75, + "learning_rate": 1.1262416584237729e-05, + "loss": 0.9853, + "step": 269100 + }, + { + "epoch": 3.75, + "learning_rate": 1.1255450758578414e-05, + "loss": 0.9849, + "step": 269200 + }, + { + "epoch": 3.75, + "learning_rate": 1.1248484932919098e-05, + "loss": 0.9848, + "step": 269300 + }, + { + "epoch": 3.75, + "learning_rate": 1.1241519107259783e-05, + "loss": 0.967, + "step": 269400 + }, + { + "epoch": 3.75, + "learning_rate": 1.123455328160047e-05, + "loss": 0.9804, + "step": 269500 + }, + { + "epoch": 3.76, + "learning_rate": 1.1227587455941154e-05, + "loss": 0.9966, + "step": 269600 + }, + { + "epoch": 3.76, + "learning_rate": 1.1220621630281839e-05, + "loss": 0.9848, + "step": 269700 + }, + { + "epoch": 3.76, + "learning_rate": 1.1213655804622524e-05, + "loss": 0.9583, + "step": 269800 + }, + { + "epoch": 3.76, + "learning_rate": 1.1206689978963207e-05, + "loss": 0.9921, + "step": 269900 + }, + { + "epoch": 3.76, + "learning_rate": 1.1199724153303891e-05, + "loss": 0.9721, + "step": 270000 + }, + { + "epoch": 3.76, + "learning_rate": 1.1192758327644576e-05, + "loss": 0.9834, + "step": 270100 + }, + { + "epoch": 3.76, + "learning_rate": 1.118579250198526e-05, + "loss": 0.9992, + "step": 270200 + }, + { + "epoch": 3.77, + "learning_rate": 1.1178826676325945e-05, + "loss": 0.9495, + "step": 270300 + }, + { + "epoch": 3.77, + "learning_rate": 1.117186085066663e-05, + "loss": 0.9699, + "step": 270400 + }, + { + "epoch": 3.77, + "learning_rate": 1.1164895025007315e-05, + "loss": 0.9755, + "step": 270500 + }, + { + "epoch": 3.77, + "learning_rate": 1.1157929199348e-05, + "loss": 0.9905, + "step": 270600 + }, + { + "epoch": 3.77, + "learning_rate": 1.1150963373688684e-05, + "loss": 0.9889, + "step": 270700 + }, + { + "epoch": 3.77, + "learning_rate": 1.1143997548029369e-05, + "loss": 0.9678, + "step": 270800 + }, + { + "epoch": 3.77, + "learning_rate": 1.1137031722370053e-05, + "loss": 0.9644, + "step": 270900 + }, + { + "epoch": 3.78, + "learning_rate": 1.1130065896710736e-05, + "loss": 0.9962, + "step": 271000 + }, + { + "epoch": 3.78, + "learning_rate": 1.1123100071051421e-05, + "loss": 1.0022, + "step": 271100 + }, + { + "epoch": 3.78, + "learning_rate": 1.1116134245392106e-05, + "loss": 0.9839, + "step": 271200 + }, + { + "epoch": 3.78, + "learning_rate": 1.110916841973279e-05, + "loss": 0.9932, + "step": 271300 + }, + { + "epoch": 3.78, + "learning_rate": 1.1102202594073475e-05, + "loss": 0.9739, + "step": 271400 + }, + { + "epoch": 3.78, + "learning_rate": 1.109523676841416e-05, + "loss": 1.0009, + "step": 271500 + }, + { + "epoch": 3.78, + "learning_rate": 1.1088270942754844e-05, + "loss": 0.9738, + "step": 271600 + }, + { + "epoch": 3.79, + "learning_rate": 1.1081305117095529e-05, + "loss": 0.9487, + "step": 271700 + }, + { + "epoch": 3.79, + "learning_rate": 1.1074339291436214e-05, + "loss": 0.9752, + "step": 271800 + }, + { + "epoch": 3.79, + "learning_rate": 1.1067373465776899e-05, + "loss": 0.9955, + "step": 271900 + }, + { + "epoch": 3.79, + "learning_rate": 1.1060407640117585e-05, + "loss": 0.9662, + "step": 272000 + }, + { + "epoch": 3.79, + "learning_rate": 1.105344181445827e-05, + "loss": 0.9841, + "step": 272100 + }, + { + "epoch": 3.79, + "learning_rate": 1.1046475988798953e-05, + "loss": 0.9586, + "step": 272200 + }, + { + "epoch": 3.79, + "learning_rate": 1.1039510163139637e-05, + "loss": 0.9878, + "step": 272300 + }, + { + "epoch": 3.79, + "learning_rate": 1.1032544337480322e-05, + "loss": 0.9602, + "step": 272400 + }, + { + "epoch": 3.8, + "learning_rate": 1.1025578511821007e-05, + "loss": 0.9938, + "step": 272500 + }, + { + "epoch": 3.8, + "learning_rate": 1.1018612686161691e-05, + "loss": 0.9777, + "step": 272600 + }, + { + "epoch": 3.8, + "learning_rate": 1.1011646860502376e-05, + "loss": 0.9831, + "step": 272700 + }, + { + "epoch": 3.8, + "learning_rate": 1.100468103484306e-05, + "loss": 0.9644, + "step": 272800 + }, + { + "epoch": 3.8, + "learning_rate": 1.0997715209183745e-05, + "loss": 0.9664, + "step": 272900 + }, + { + "epoch": 3.8, + "learning_rate": 1.099074938352443e-05, + "loss": 0.9777, + "step": 273000 + }, + { + "epoch": 3.8, + "learning_rate": 1.0983783557865115e-05, + "loss": 0.9728, + "step": 273100 + }, + { + "epoch": 3.81, + "learning_rate": 1.09768177322058e-05, + "loss": 0.9791, + "step": 273200 + }, + { + "epoch": 3.81, + "learning_rate": 1.0969921564803078e-05, + "loss": 0.9805, + "step": 273300 + }, + { + "epoch": 3.81, + "learning_rate": 1.0962955739143762e-05, + "loss": 0.9774, + "step": 273400 + }, + { + "epoch": 3.81, + "learning_rate": 1.0955989913484445e-05, + "loss": 0.9813, + "step": 273500 + }, + { + "epoch": 3.81, + "learning_rate": 1.094902408782513e-05, + "loss": 0.9358, + "step": 273600 + }, + { + "epoch": 3.81, + "learning_rate": 1.0942058262165815e-05, + "loss": 1.0004, + "step": 273700 + }, + { + "epoch": 3.81, + "learning_rate": 1.09350924365065e-05, + "loss": 0.97, + "step": 273800 + }, + { + "epoch": 3.82, + "learning_rate": 1.0928126610847184e-05, + "loss": 0.9578, + "step": 273900 + }, + { + "epoch": 3.82, + "learning_rate": 1.0921160785187869e-05, + "loss": 0.994, + "step": 274000 + }, + { + "epoch": 3.82, + "learning_rate": 1.0914194959528553e-05, + "loss": 0.9845, + "step": 274100 + }, + { + "epoch": 3.82, + "learning_rate": 1.0907229133869238e-05, + "loss": 0.9934, + "step": 274200 + }, + { + "epoch": 3.82, + "learning_rate": 1.0900263308209923e-05, + "loss": 0.9909, + "step": 274300 + }, + { + "epoch": 3.82, + "learning_rate": 1.0893297482550607e-05, + "loss": 0.9895, + "step": 274400 + }, + { + "epoch": 3.82, + "learning_rate": 1.0886331656891292e-05, + "loss": 0.9869, + "step": 274500 + }, + { + "epoch": 3.83, + "learning_rate": 1.0879365831231975e-05, + "loss": 0.9759, + "step": 274600 + }, + { + "epoch": 3.83, + "learning_rate": 1.087240000557266e-05, + "loss": 1.013, + "step": 274700 + }, + { + "epoch": 3.83, + "learning_rate": 1.0865434179913344e-05, + "loss": 0.9553, + "step": 274800 + }, + { + "epoch": 3.83, + "learning_rate": 1.0858468354254029e-05, + "loss": 0.9715, + "step": 274900 + }, + { + "epoch": 3.83, + "learning_rate": 1.0851502528594714e-05, + "loss": 0.9699, + "step": 275000 + }, + { + "epoch": 3.83, + "learning_rate": 1.0844536702935398e-05, + "loss": 1.0056, + "step": 275100 + }, + { + "epoch": 3.83, + "learning_rate": 1.0837640535532677e-05, + "loss": 1.0038, + "step": 275200 + }, + { + "epoch": 3.84, + "learning_rate": 1.0830674709873361e-05, + "loss": 0.9863, + "step": 275300 + }, + { + "epoch": 3.84, + "learning_rate": 1.0823708884214046e-05, + "loss": 0.9697, + "step": 275400 + }, + { + "epoch": 3.84, + "learning_rate": 1.081674305855473e-05, + "loss": 0.9998, + "step": 275500 + }, + { + "epoch": 3.84, + "learning_rate": 1.0809777232895415e-05, + "loss": 1.0124, + "step": 275600 + }, + { + "epoch": 3.84, + "learning_rate": 1.08028114072361e-05, + "loss": 0.979, + "step": 275700 + }, + { + "epoch": 3.84, + "learning_rate": 1.0795845581576785e-05, + "loss": 1.0042, + "step": 275800 + }, + { + "epoch": 3.84, + "learning_rate": 1.0788879755917468e-05, + "loss": 0.9549, + "step": 275900 + }, + { + "epoch": 3.85, + "learning_rate": 1.0781913930258154e-05, + "loss": 0.9615, + "step": 276000 + }, + { + "epoch": 3.85, + "learning_rate": 1.0774948104598839e-05, + "loss": 0.9758, + "step": 276100 + }, + { + "epoch": 3.85, + "learning_rate": 1.0767982278939523e-05, + "loss": 0.9454, + "step": 276200 + }, + { + "epoch": 3.85, + "learning_rate": 1.0761016453280208e-05, + "loss": 0.9954, + "step": 276300 + }, + { + "epoch": 3.85, + "learning_rate": 1.0754050627620893e-05, + "loss": 0.9784, + "step": 276400 + }, + { + "epoch": 3.85, + "learning_rate": 1.0747084801961577e-05, + "loss": 0.9846, + "step": 276500 + }, + { + "epoch": 3.85, + "learning_rate": 1.0740118976302262e-05, + "loss": 0.9897, + "step": 276600 + }, + { + "epoch": 3.85, + "learning_rate": 1.0733153150642947e-05, + "loss": 1.0244, + "step": 276700 + }, + { + "epoch": 3.86, + "learning_rate": 1.0726187324983631e-05, + "loss": 0.9911, + "step": 276800 + }, + { + "epoch": 3.86, + "learning_rate": 1.0719221499324316e-05, + "loss": 1.0317, + "step": 276900 + }, + { + "epoch": 3.86, + "learning_rate": 1.0712255673665e-05, + "loss": 0.9978, + "step": 277000 + }, + { + "epoch": 3.86, + "learning_rate": 1.0705289848005684e-05, + "loss": 1.0002, + "step": 277100 + }, + { + "epoch": 3.86, + "learning_rate": 1.0698324022346369e-05, + "loss": 0.9814, + "step": 277200 + }, + { + "epoch": 3.86, + "learning_rate": 1.0691358196687053e-05, + "loss": 0.9785, + "step": 277300 + }, + { + "epoch": 3.86, + "learning_rate": 1.0684392371027738e-05, + "loss": 0.9724, + "step": 277400 + }, + { + "epoch": 3.87, + "learning_rate": 1.0677426545368423e-05, + "loss": 0.9989, + "step": 277500 + }, + { + "epoch": 3.87, + "learning_rate": 1.0670460719709107e-05, + "loss": 0.961, + "step": 277600 + }, + { + "epoch": 3.87, + "learning_rate": 1.0663494894049792e-05, + "loss": 0.96, + "step": 277700 + }, + { + "epoch": 3.87, + "learning_rate": 1.0656529068390477e-05, + "loss": 0.9835, + "step": 277800 + }, + { + "epoch": 3.87, + "learning_rate": 1.0649563242731161e-05, + "loss": 0.9815, + "step": 277900 + }, + { + "epoch": 3.87, + "learning_rate": 1.0642597417071846e-05, + "loss": 0.9932, + "step": 278000 + }, + { + "epoch": 3.87, + "learning_rate": 1.0635631591412529e-05, + "loss": 0.9926, + "step": 278100 + }, + { + "epoch": 3.88, + "learning_rate": 1.0628665765753214e-05, + "loss": 0.9705, + "step": 278200 + }, + { + "epoch": 3.88, + "learning_rate": 1.0621699940093898e-05, + "loss": 1.0001, + "step": 278300 + }, + { + "epoch": 3.88, + "learning_rate": 1.0614734114434583e-05, + "loss": 0.9889, + "step": 278400 + }, + { + "epoch": 3.88, + "learning_rate": 1.060776828877527e-05, + "loss": 0.9962, + "step": 278500 + }, + { + "epoch": 3.88, + "learning_rate": 1.0600802463115954e-05, + "loss": 0.9582, + "step": 278600 + }, + { + "epoch": 3.88, + "learning_rate": 1.0593836637456639e-05, + "loss": 0.9944, + "step": 278700 + }, + { + "epoch": 3.88, + "learning_rate": 1.0586870811797323e-05, + "loss": 0.9763, + "step": 278800 + }, + { + "epoch": 3.89, + "learning_rate": 1.0579904986138008e-05, + "loss": 0.9497, + "step": 278900 + }, + { + "epoch": 3.89, + "learning_rate": 1.0572939160478693e-05, + "loss": 0.9902, + "step": 279000 + }, + { + "epoch": 3.89, + "learning_rate": 1.0565973334819378e-05, + "loss": 0.9809, + "step": 279100 + }, + { + "epoch": 3.89, + "learning_rate": 1.0559077167416654e-05, + "loss": 0.9747, + "step": 279200 + }, + { + "epoch": 3.89, + "learning_rate": 1.055211134175734e-05, + "loss": 0.9805, + "step": 279300 + }, + { + "epoch": 3.89, + "learning_rate": 1.0545145516098023e-05, + "loss": 1.0024, + "step": 279400 + }, + { + "epoch": 3.89, + "learning_rate": 1.0538179690438708e-05, + "loss": 0.9853, + "step": 279500 + }, + { + "epoch": 3.9, + "learning_rate": 1.0531213864779393e-05, + "loss": 0.9734, + "step": 279600 + }, + { + "epoch": 3.9, + "learning_rate": 1.0524317697376669e-05, + "loss": 0.9694, + "step": 279700 + }, + { + "epoch": 3.9, + "learning_rate": 1.0517351871717354e-05, + "loss": 1.0061, + "step": 279800 + }, + { + "epoch": 3.9, + "learning_rate": 1.0510386046058038e-05, + "loss": 0.9687, + "step": 279900 + }, + { + "epoch": 3.9, + "learning_rate": 1.0503420220398723e-05, + "loss": 0.9794, + "step": 280000 + }, + { + "epoch": 3.9, + "learning_rate": 1.049645439473941e-05, + "loss": 0.9878, + "step": 280100 + }, + { + "epoch": 3.9, + "learning_rate": 1.0489488569080094e-05, + "loss": 0.9465, + "step": 280200 + }, + { + "epoch": 3.91, + "learning_rate": 1.0482522743420779e-05, + "loss": 1.0336, + "step": 280300 + }, + { + "epoch": 3.91, + "learning_rate": 1.0475556917761464e-05, + "loss": 0.9868, + "step": 280400 + }, + { + "epoch": 3.91, + "learning_rate": 1.0468591092102148e-05, + "loss": 1.0007, + "step": 280500 + }, + { + "epoch": 3.91, + "learning_rate": 1.0461625266442833e-05, + "loss": 0.9596, + "step": 280600 + }, + { + "epoch": 3.91, + "learning_rate": 1.0454659440783516e-05, + "loss": 1.0274, + "step": 280700 + }, + { + "epoch": 3.91, + "learning_rate": 1.04476936151242e-05, + "loss": 0.9902, + "step": 280800 + }, + { + "epoch": 3.91, + "learning_rate": 1.0440727789464885e-05, + "loss": 0.9704, + "step": 280900 + }, + { + "epoch": 3.91, + "learning_rate": 1.043376196380557e-05, + "loss": 0.9813, + "step": 281000 + }, + { + "epoch": 3.92, + "learning_rate": 1.0426796138146255e-05, + "loss": 1.0052, + "step": 281100 + }, + { + "epoch": 3.92, + "learning_rate": 1.041983031248694e-05, + "loss": 0.9405, + "step": 281200 + }, + { + "epoch": 3.92, + "learning_rate": 1.0412864486827624e-05, + "loss": 0.9918, + "step": 281300 + }, + { + "epoch": 3.92, + "learning_rate": 1.0405898661168309e-05, + "loss": 0.9681, + "step": 281400 + }, + { + "epoch": 3.92, + "learning_rate": 1.0398932835508993e-05, + "loss": 0.9744, + "step": 281500 + }, + { + "epoch": 3.92, + "learning_rate": 1.0391967009849678e-05, + "loss": 0.974, + "step": 281600 + }, + { + "epoch": 3.92, + "learning_rate": 1.0385001184190363e-05, + "loss": 1.0004, + "step": 281700 + }, + { + "epoch": 3.93, + "learning_rate": 1.0378035358531046e-05, + "loss": 0.9524, + "step": 281800 + }, + { + "epoch": 3.93, + "learning_rate": 1.037106953287173e-05, + "loss": 0.9796, + "step": 281900 + }, + { + "epoch": 3.93, + "learning_rate": 1.0364103707212415e-05, + "loss": 1.0044, + "step": 282000 + }, + { + "epoch": 3.93, + "learning_rate": 1.03571378815531e-05, + "loss": 0.9573, + "step": 282100 + }, + { + "epoch": 3.93, + "learning_rate": 1.0350172055893784e-05, + "loss": 1.0033, + "step": 282200 + }, + { + "epoch": 3.93, + "learning_rate": 1.034320623023447e-05, + "loss": 1.0084, + "step": 282300 + }, + { + "epoch": 3.93, + "learning_rate": 1.0336240404575154e-05, + "loss": 0.9516, + "step": 282400 + }, + { + "epoch": 3.94, + "learning_rate": 1.0329274578915839e-05, + "loss": 0.9972, + "step": 282500 + }, + { + "epoch": 3.94, + "learning_rate": 1.0322308753256525e-05, + "loss": 0.9877, + "step": 282600 + }, + { + "epoch": 3.94, + "learning_rate": 1.031534292759721e-05, + "loss": 1.0119, + "step": 282700 + }, + { + "epoch": 3.94, + "learning_rate": 1.0308377101937894e-05, + "loss": 0.9725, + "step": 282800 + }, + { + "epoch": 3.94, + "learning_rate": 1.0301411276278579e-05, + "loss": 0.9483, + "step": 282900 + }, + { + "epoch": 3.94, + "learning_rate": 1.0294445450619262e-05, + "loss": 1.0138, + "step": 283000 + }, + { + "epoch": 3.94, + "learning_rate": 1.0287479624959947e-05, + "loss": 0.9888, + "step": 283100 + }, + { + "epoch": 3.95, + "learning_rate": 1.0280513799300631e-05, + "loss": 0.9672, + "step": 283200 + }, + { + "epoch": 3.95, + "learning_rate": 1.0273547973641316e-05, + "loss": 0.9859, + "step": 283300 + }, + { + "epoch": 3.95, + "learning_rate": 1.0266582147982e-05, + "loss": 0.9606, + "step": 283400 + }, + { + "epoch": 3.95, + "learning_rate": 1.0259616322322685e-05, + "loss": 1.0003, + "step": 283500 + }, + { + "epoch": 3.95, + "learning_rate": 1.025265049666337e-05, + "loss": 0.9798, + "step": 283600 + }, + { + "epoch": 3.95, + "learning_rate": 1.0245684671004055e-05, + "loss": 0.9869, + "step": 283700 + }, + { + "epoch": 3.95, + "learning_rate": 1.023871884534474e-05, + "loss": 1.0049, + "step": 283800 + }, + { + "epoch": 3.96, + "learning_rate": 1.0231753019685424e-05, + "loss": 0.9581, + "step": 283900 + }, + { + "epoch": 3.96, + "learning_rate": 1.0224787194026109e-05, + "loss": 1.0021, + "step": 284000 + }, + { + "epoch": 3.96, + "learning_rate": 1.0217821368366792e-05, + "loss": 0.9775, + "step": 284100 + }, + { + "epoch": 3.96, + "learning_rate": 1.0210855542707476e-05, + "loss": 0.9659, + "step": 284200 + }, + { + "epoch": 3.96, + "learning_rate": 1.0203889717048161e-05, + "loss": 0.9469, + "step": 284300 + }, + { + "epoch": 3.96, + "learning_rate": 1.019699354964544e-05, + "loss": 0.9738, + "step": 284400 + }, + { + "epoch": 3.96, + "learning_rate": 1.0190027723986124e-05, + "loss": 0.9421, + "step": 284500 + }, + { + "epoch": 3.96, + "learning_rate": 1.0183061898326809e-05, + "loss": 0.989, + "step": 284600 + }, + { + "epoch": 3.97, + "learning_rate": 1.0176096072667493e-05, + "loss": 0.9879, + "step": 284700 + }, + { + "epoch": 3.97, + "learning_rate": 1.0169199905264771e-05, + "loss": 0.9911, + "step": 284800 + }, + { + "epoch": 3.97, + "learning_rate": 1.0162234079605456e-05, + "loss": 0.9613, + "step": 284900 + }, + { + "epoch": 3.97, + "learning_rate": 1.015526825394614e-05, + "loss": 0.9824, + "step": 285000 + }, + { + "epoch": 3.97, + "learning_rate": 1.0148302428286825e-05, + "loss": 0.9766, + "step": 285100 + }, + { + "epoch": 3.97, + "learning_rate": 1.014133660262751e-05, + "loss": 0.9803, + "step": 285200 + }, + { + "epoch": 3.97, + "learning_rate": 1.0134370776968195e-05, + "loss": 1.0073, + "step": 285300 + }, + { + "epoch": 3.98, + "learning_rate": 1.012740495130888e-05, + "loss": 0.9942, + "step": 285400 + }, + { + "epoch": 3.98, + "learning_rate": 1.0120439125649564e-05, + "loss": 0.9659, + "step": 285500 + }, + { + "epoch": 3.98, + "learning_rate": 1.0113473299990247e-05, + "loss": 0.9612, + "step": 285600 + }, + { + "epoch": 3.98, + "learning_rate": 1.0106507474330932e-05, + "loss": 0.969, + "step": 285700 + }, + { + "epoch": 3.98, + "learning_rate": 1.0099541648671617e-05, + "loss": 0.9585, + "step": 285800 + }, + { + "epoch": 3.98, + "learning_rate": 1.0092575823012301e-05, + "loss": 0.9946, + "step": 285900 + }, + { + "epoch": 3.98, + "learning_rate": 1.0085609997352986e-05, + "loss": 0.9986, + "step": 286000 + }, + { + "epoch": 3.99, + "learning_rate": 1.007864417169367e-05, + "loss": 1.0097, + "step": 286100 + }, + { + "epoch": 3.99, + "learning_rate": 1.0071678346034355e-05, + "loss": 1.0117, + "step": 286200 + }, + { + "epoch": 3.99, + "learning_rate": 1.006471252037504e-05, + "loss": 0.973, + "step": 286300 + }, + { + "epoch": 3.99, + "learning_rate": 1.0057746694715725e-05, + "loss": 0.9853, + "step": 286400 + }, + { + "epoch": 3.99, + "learning_rate": 1.005078086905641e-05, + "loss": 0.9752, + "step": 286500 + }, + { + "epoch": 3.99, + "learning_rate": 1.0043815043397094e-05, + "loss": 0.9869, + "step": 286600 + }, + { + "epoch": 3.99, + "learning_rate": 1.0036849217737779e-05, + "loss": 0.9808, + "step": 286700 + }, + { + "epoch": 4.0, + "learning_rate": 1.0029883392078463e-05, + "loss": 0.9659, + "step": 286800 + }, + { + "epoch": 4.0, + "learning_rate": 1.0022917566419148e-05, + "loss": 0.9788, + "step": 286900 + }, + { + "epoch": 4.0, + "learning_rate": 1.0015951740759833e-05, + "loss": 0.9915, + "step": 287000 + }, + { + "epoch": 4.0, + "learning_rate": 1.0008985915100517e-05, + "loss": 0.9798, + "step": 287100 + }, + { + "epoch": 4.0, + "eval_gen_len": 20.0, + "eval_loss": 1.1669589281082153, + "eval_rouge1": 12.4306, + "eval_rouge2": 3.7329, + "eval_rougeL": 11.9497, + "eval_rougeLsum": 12.0617, + "eval_runtime": 1509.6015, + "eval_samples_per_second": 8.855, + "eval_steps_per_second": 2.214, + "step": 287116 + }, + { + "epoch": 4.0, + "learning_rate": 1.0002020089441202e-05, + "loss": 0.8798, + "step": 287200 + }, + { + "epoch": 4.0, + "learning_rate": 9.995054263781887e-06, + "loss": 0.887, + "step": 287300 + }, + { + "epoch": 4.0, + "learning_rate": 9.988088438122572e-06, + "loss": 0.9167, + "step": 287400 + }, + { + "epoch": 4.01, + "learning_rate": 9.981122612463256e-06, + "loss": 0.8906, + "step": 287500 + }, + { + "epoch": 4.01, + "learning_rate": 9.974156786803941e-06, + "loss": 0.8845, + "step": 287600 + }, + { + "epoch": 4.01, + "learning_rate": 9.967190961144626e-06, + "loss": 0.8998, + "step": 287700 + }, + { + "epoch": 4.01, + "learning_rate": 9.960225135485309e-06, + "loss": 0.9037, + "step": 287800 + }, + { + "epoch": 4.01, + "learning_rate": 9.953259309825993e-06, + "loss": 0.86, + "step": 287900 + }, + { + "epoch": 4.01, + "learning_rate": 9.946293484166678e-06, + "loss": 0.9148, + "step": 288000 + }, + { + "epoch": 4.01, + "learning_rate": 9.939327658507363e-06, + "loss": 0.9055, + "step": 288100 + }, + { + "epoch": 4.02, + "learning_rate": 9.932361832848047e-06, + "loss": 0.8924, + "step": 288200 + }, + { + "epoch": 4.02, + "learning_rate": 9.925396007188732e-06, + "loss": 0.8662, + "step": 288300 + }, + { + "epoch": 4.02, + "learning_rate": 9.918430181529417e-06, + "loss": 0.8848, + "step": 288400 + }, + { + "epoch": 4.02, + "learning_rate": 9.911464355870101e-06, + "loss": 0.8896, + "step": 288500 + }, + { + "epoch": 4.02, + "learning_rate": 9.90456818846738e-06, + "loss": 0.8681, + "step": 288600 + }, + { + "epoch": 4.02, + "learning_rate": 9.897602362808064e-06, + "loss": 0.9143, + "step": 288700 + }, + { + "epoch": 4.02, + "learning_rate": 9.890636537148749e-06, + "loss": 0.8555, + "step": 288800 + }, + { + "epoch": 4.02, + "learning_rate": 9.883670711489434e-06, + "loss": 0.898, + "step": 288900 + }, + { + "epoch": 4.03, + "learning_rate": 9.876704885830118e-06, + "loss": 0.8996, + "step": 289000 + }, + { + "epoch": 4.03, + "learning_rate": 9.869739060170801e-06, + "loss": 0.8892, + "step": 289100 + }, + { + "epoch": 4.03, + "learning_rate": 9.862773234511486e-06, + "loss": 0.8774, + "step": 289200 + }, + { + "epoch": 4.03, + "learning_rate": 9.85580740885217e-06, + "loss": 0.8788, + "step": 289300 + }, + { + "epoch": 4.03, + "learning_rate": 9.848841583192855e-06, + "loss": 0.8799, + "step": 289400 + }, + { + "epoch": 4.03, + "learning_rate": 9.84187575753354e-06, + "loss": 0.888, + "step": 289500 + }, + { + "epoch": 4.03, + "learning_rate": 9.834909931874225e-06, + "loss": 0.8484, + "step": 289600 + }, + { + "epoch": 4.04, + "learning_rate": 9.82794410621491e-06, + "loss": 0.9076, + "step": 289700 + }, + { + "epoch": 4.04, + "learning_rate": 9.820978280555594e-06, + "loss": 0.879, + "step": 289800 + }, + { + "epoch": 4.04, + "learning_rate": 9.81401245489628e-06, + "loss": 0.8713, + "step": 289900 + }, + { + "epoch": 4.04, + "learning_rate": 9.807046629236965e-06, + "loss": 0.8748, + "step": 290000 + }, + { + "epoch": 4.04, + "learning_rate": 9.80008080357765e-06, + "loss": 0.8888, + "step": 290100 + }, + { + "epoch": 4.04, + "learning_rate": 9.793114977918333e-06, + "loss": 0.8755, + "step": 290200 + }, + { + "epoch": 4.04, + "learning_rate": 9.786149152259017e-06, + "loss": 0.8682, + "step": 290300 + }, + { + "epoch": 4.05, + "learning_rate": 9.779183326599702e-06, + "loss": 0.8733, + "step": 290400 + }, + { + "epoch": 4.05, + "learning_rate": 9.772217500940387e-06, + "loss": 0.8791, + "step": 290500 + }, + { + "epoch": 4.05, + "learning_rate": 9.765251675281071e-06, + "loss": 0.8938, + "step": 290600 + }, + { + "epoch": 4.05, + "learning_rate": 9.758285849621756e-06, + "loss": 0.9049, + "step": 290700 + }, + { + "epoch": 4.05, + "learning_rate": 9.75132002396244e-06, + "loss": 0.8843, + "step": 290800 + }, + { + "epoch": 4.05, + "learning_rate": 9.744354198303125e-06, + "loss": 0.9157, + "step": 290900 + }, + { + "epoch": 4.05, + "learning_rate": 9.73738837264381e-06, + "loss": 0.9201, + "step": 291000 + }, + { + "epoch": 4.06, + "learning_rate": 9.730422546984495e-06, + "loss": 0.8714, + "step": 291100 + }, + { + "epoch": 4.06, + "learning_rate": 9.72345672132518e-06, + "loss": 0.8757, + "step": 291200 + }, + { + "epoch": 4.06, + "learning_rate": 9.716490895665864e-06, + "loss": 0.8866, + "step": 291300 + }, + { + "epoch": 4.06, + "learning_rate": 9.709525070006547e-06, + "loss": 0.9118, + "step": 291400 + }, + { + "epoch": 4.06, + "learning_rate": 9.702559244347232e-06, + "loss": 0.9034, + "step": 291500 + }, + { + "epoch": 4.06, + "learning_rate": 9.695593418687917e-06, + "loss": 0.8982, + "step": 291600 + }, + { + "epoch": 4.06, + "learning_rate": 9.688627593028601e-06, + "loss": 0.9282, + "step": 291700 + }, + { + "epoch": 4.07, + "learning_rate": 9.681661767369286e-06, + "loss": 0.8808, + "step": 291800 + }, + { + "epoch": 4.07, + "learning_rate": 9.67469594170997e-06, + "loss": 0.8979, + "step": 291900 + }, + { + "epoch": 4.07, + "learning_rate": 9.667799774307249e-06, + "loss": 0.9066, + "step": 292000 + }, + { + "epoch": 4.07, + "learning_rate": 9.660833948647933e-06, + "loss": 0.9289, + "step": 292100 + }, + { + "epoch": 4.07, + "learning_rate": 9.653868122988618e-06, + "loss": 0.9009, + "step": 292200 + }, + { + "epoch": 4.07, + "learning_rate": 9.646902297329303e-06, + "loss": 0.9069, + "step": 292300 + }, + { + "epoch": 4.07, + "learning_rate": 9.639936471669987e-06, + "loss": 0.9099, + "step": 292400 + }, + { + "epoch": 4.08, + "learning_rate": 9.632970646010672e-06, + "loss": 0.8847, + "step": 292500 + }, + { + "epoch": 4.08, + "learning_rate": 9.626004820351357e-06, + "loss": 0.8912, + "step": 292600 + }, + { + "epoch": 4.08, + "learning_rate": 9.61903899469204e-06, + "loss": 0.891, + "step": 292700 + }, + { + "epoch": 4.08, + "learning_rate": 9.612073169032725e-06, + "loss": 0.9028, + "step": 292800 + }, + { + "epoch": 4.08, + "learning_rate": 9.60510734337341e-06, + "loss": 0.9037, + "step": 292900 + }, + { + "epoch": 4.08, + "learning_rate": 9.598141517714094e-06, + "loss": 0.9124, + "step": 293000 + }, + { + "epoch": 4.08, + "learning_rate": 9.591175692054779e-06, + "loss": 0.8872, + "step": 293100 + }, + { + "epoch": 4.08, + "learning_rate": 9.584209866395465e-06, + "loss": 0.8969, + "step": 293200 + }, + { + "epoch": 4.09, + "learning_rate": 9.57724404073615e-06, + "loss": 0.9182, + "step": 293300 + }, + { + "epoch": 4.09, + "learning_rate": 9.570278215076834e-06, + "loss": 0.9193, + "step": 293400 + }, + { + "epoch": 4.09, + "learning_rate": 9.563312389417519e-06, + "loss": 0.8957, + "step": 293500 + }, + { + "epoch": 4.09, + "learning_rate": 9.556346563758204e-06, + "loss": 0.8839, + "step": 293600 + }, + { + "epoch": 4.09, + "learning_rate": 9.549380738098888e-06, + "loss": 0.903, + "step": 293700 + }, + { + "epoch": 4.09, + "learning_rate": 9.542414912439571e-06, + "loss": 0.8874, + "step": 293800 + }, + { + "epoch": 4.09, + "learning_rate": 9.535449086780256e-06, + "loss": 0.9027, + "step": 293900 + }, + { + "epoch": 4.1, + "learning_rate": 9.52848326112094e-06, + "loss": 0.8889, + "step": 294000 + }, + { + "epoch": 4.1, + "learning_rate": 9.521587093718219e-06, + "loss": 0.8773, + "step": 294100 + }, + { + "epoch": 4.1, + "learning_rate": 9.514621268058904e-06, + "loss": 0.91, + "step": 294200 + }, + { + "epoch": 4.1, + "learning_rate": 9.507655442399588e-06, + "loss": 0.8912, + "step": 294300 + }, + { + "epoch": 4.1, + "learning_rate": 9.500689616740273e-06, + "loss": 0.8588, + "step": 294400 + }, + { + "epoch": 4.1, + "learning_rate": 9.493723791080958e-06, + "loss": 0.8836, + "step": 294500 + }, + { + "epoch": 4.1, + "learning_rate": 9.486757965421642e-06, + "loss": 0.8813, + "step": 294600 + }, + { + "epoch": 4.11, + "learning_rate": 9.479792139762327e-06, + "loss": 0.8857, + "step": 294700 + }, + { + "epoch": 4.11, + "learning_rate": 9.472826314103012e-06, + "loss": 0.9061, + "step": 294800 + }, + { + "epoch": 4.11, + "learning_rate": 9.465860488443696e-06, + "loss": 0.9027, + "step": 294900 + }, + { + "epoch": 4.11, + "learning_rate": 9.458964321040974e-06, + "loss": 0.9203, + "step": 295000 + }, + { + "epoch": 4.11, + "learning_rate": 9.451998495381659e-06, + "loss": 0.8871, + "step": 295100 + }, + { + "epoch": 4.11, + "learning_rate": 9.445032669722344e-06, + "loss": 0.9012, + "step": 295200 + }, + { + "epoch": 4.11, + "learning_rate": 9.438066844063027e-06, + "loss": 0.9211, + "step": 295300 + }, + { + "epoch": 4.12, + "learning_rate": 9.431101018403711e-06, + "loss": 0.8783, + "step": 295400 + }, + { + "epoch": 4.12, + "learning_rate": 9.424135192744396e-06, + "loss": 0.8633, + "step": 295500 + }, + { + "epoch": 4.12, + "learning_rate": 9.41716936708508e-06, + "loss": 0.9149, + "step": 295600 + }, + { + "epoch": 4.12, + "learning_rate": 9.410203541425766e-06, + "loss": 0.8882, + "step": 295700 + }, + { + "epoch": 4.12, + "learning_rate": 9.40323771576645e-06, + "loss": 0.8843, + "step": 295800 + }, + { + "epoch": 4.12, + "learning_rate": 9.396271890107135e-06, + "loss": 0.8888, + "step": 295900 + }, + { + "epoch": 4.12, + "learning_rate": 9.38930606444782e-06, + "loss": 0.8865, + "step": 296000 + }, + { + "epoch": 4.13, + "learning_rate": 9.382340238788504e-06, + "loss": 0.8992, + "step": 296100 + }, + { + "epoch": 4.13, + "learning_rate": 9.375374413129189e-06, + "loss": 0.8829, + "step": 296200 + }, + { + "epoch": 4.13, + "learning_rate": 9.368408587469874e-06, + "loss": 0.9391, + "step": 296300 + }, + { + "epoch": 4.13, + "learning_rate": 9.361442761810557e-06, + "loss": 0.8892, + "step": 296400 + }, + { + "epoch": 4.13, + "learning_rate": 9.354476936151241e-06, + "loss": 0.8723, + "step": 296500 + }, + { + "epoch": 4.13, + "learning_rate": 9.347511110491926e-06, + "loss": 0.905, + "step": 296600 + }, + { + "epoch": 4.13, + "learning_rate": 9.34054528483261e-06, + "loss": 0.8861, + "step": 296700 + }, + { + "epoch": 4.13, + "learning_rate": 9.333579459173295e-06, + "loss": 0.875, + "step": 296800 + }, + { + "epoch": 4.14, + "learning_rate": 9.32661363351398e-06, + "loss": 0.8967, + "step": 296900 + }, + { + "epoch": 4.14, + "learning_rate": 9.319647807854665e-06, + "loss": 0.9027, + "step": 297000 + }, + { + "epoch": 4.14, + "learning_rate": 9.31268198219535e-06, + "loss": 0.885, + "step": 297100 + }, + { + "epoch": 4.14, + "learning_rate": 9.305716156536036e-06, + "loss": 0.9072, + "step": 297200 + }, + { + "epoch": 4.14, + "learning_rate": 9.29875033087672e-06, + "loss": 0.9043, + "step": 297300 + }, + { + "epoch": 4.14, + "learning_rate": 9.291784505217405e-06, + "loss": 0.864, + "step": 297400 + }, + { + "epoch": 4.14, + "learning_rate": 9.284818679558088e-06, + "loss": 0.8637, + "step": 297500 + }, + { + "epoch": 4.15, + "learning_rate": 9.277852853898773e-06, + "loss": 0.887, + "step": 297600 + }, + { + "epoch": 4.15, + "learning_rate": 9.270887028239457e-06, + "loss": 0.9015, + "step": 297700 + }, + { + "epoch": 4.15, + "learning_rate": 9.263921202580142e-06, + "loss": 0.9068, + "step": 297800 + }, + { + "epoch": 4.15, + "learning_rate": 9.256955376920827e-06, + "loss": 0.8656, + "step": 297900 + }, + { + "epoch": 4.15, + "learning_rate": 9.249989551261512e-06, + "loss": 0.9118, + "step": 298000 + }, + { + "epoch": 4.15, + "learning_rate": 9.243023725602196e-06, + "loss": 0.8942, + "step": 298100 + }, + { + "epoch": 4.15, + "learning_rate": 9.236127558199474e-06, + "loss": 0.8867, + "step": 298200 + }, + { + "epoch": 4.16, + "learning_rate": 9.229161732540159e-06, + "loss": 0.8914, + "step": 298300 + }, + { + "epoch": 4.16, + "learning_rate": 9.222195906880844e-06, + "loss": 0.8917, + "step": 298400 + }, + { + "epoch": 4.16, + "learning_rate": 9.215230081221528e-06, + "loss": 0.8779, + "step": 298500 + }, + { + "epoch": 4.16, + "learning_rate": 9.208264255562213e-06, + "loss": 0.9147, + "step": 298600 + }, + { + "epoch": 4.16, + "learning_rate": 9.201298429902898e-06, + "loss": 0.882, + "step": 298700 + }, + { + "epoch": 4.16, + "learning_rate": 9.19433260424358e-06, + "loss": 0.8678, + "step": 298800 + }, + { + "epoch": 4.16, + "learning_rate": 9.187366778584265e-06, + "loss": 0.8821, + "step": 298900 + }, + { + "epoch": 4.17, + "learning_rate": 9.18040095292495e-06, + "loss": 0.8871, + "step": 299000 + }, + { + "epoch": 4.17, + "learning_rate": 9.173435127265635e-06, + "loss": 0.9118, + "step": 299100 + }, + { + "epoch": 4.17, + "learning_rate": 9.16646930160632e-06, + "loss": 0.9057, + "step": 299200 + }, + { + "epoch": 4.17, + "learning_rate": 9.159503475947004e-06, + "loss": 0.8713, + "step": 299300 + }, + { + "epoch": 4.17, + "learning_rate": 9.152537650287689e-06, + "loss": 0.8827, + "step": 299400 + }, + { + "epoch": 4.17, + "learning_rate": 9.145571824628374e-06, + "loss": 0.8992, + "step": 299500 + }, + { + "epoch": 4.17, + "learning_rate": 9.138605998969058e-06, + "loss": 0.9391, + "step": 299600 + }, + { + "epoch": 4.18, + "learning_rate": 9.131640173309743e-06, + "loss": 0.9138, + "step": 299700 + }, + { + "epoch": 4.18, + "learning_rate": 9.124674347650428e-06, + "loss": 0.8712, + "step": 299800 + }, + { + "epoch": 4.18, + "learning_rate": 9.11770852199111e-06, + "loss": 0.9157, + "step": 299900 + }, + { + "epoch": 4.18, + "learning_rate": 9.110742696331795e-06, + "loss": 0.8838, + "step": 300000 + }, + { + "epoch": 4.18, + "learning_rate": 9.10377687067248e-06, + "loss": 0.9131, + "step": 300100 + }, + { + "epoch": 4.18, + "learning_rate": 9.096811045013165e-06, + "loss": 0.9078, + "step": 300200 + }, + { + "epoch": 4.18, + "learning_rate": 9.08984521935385e-06, + "loss": 0.9053, + "step": 300300 + }, + { + "epoch": 4.19, + "learning_rate": 9.082879393694534e-06, + "loss": 0.9065, + "step": 300400 + }, + { + "epoch": 4.19, + "learning_rate": 9.07591356803522e-06, + "loss": 0.9008, + "step": 300500 + }, + { + "epoch": 4.19, + "learning_rate": 9.068947742375905e-06, + "loss": 0.8981, + "step": 300600 + }, + { + "epoch": 4.19, + "learning_rate": 9.06198191671659e-06, + "loss": 0.8974, + "step": 300700 + }, + { + "epoch": 4.19, + "learning_rate": 9.055016091057274e-06, + "loss": 0.9151, + "step": 300800 + }, + { + "epoch": 4.19, + "learning_rate": 9.048050265397959e-06, + "loss": 0.9115, + "step": 300900 + }, + { + "epoch": 4.19, + "learning_rate": 9.041084439738642e-06, + "loss": 0.9199, + "step": 301000 + }, + { + "epoch": 4.19, + "learning_rate": 9.034118614079327e-06, + "loss": 0.8923, + "step": 301100 + }, + { + "epoch": 4.2, + "learning_rate": 9.027152788420011e-06, + "loss": 0.8718, + "step": 301200 + }, + { + "epoch": 4.2, + "learning_rate": 9.020186962760696e-06, + "loss": 0.8895, + "step": 301300 + }, + { + "epoch": 4.2, + "learning_rate": 9.01322113710138e-06, + "loss": 0.9087, + "step": 301400 + }, + { + "epoch": 4.2, + "learning_rate": 9.006255311442065e-06, + "loss": 0.8972, + "step": 301500 + }, + { + "epoch": 4.2, + "learning_rate": 8.99928948578275e-06, + "loss": 0.9067, + "step": 301600 + }, + { + "epoch": 4.2, + "learning_rate": 8.992323660123435e-06, + "loss": 0.9087, + "step": 301700 + }, + { + "epoch": 4.2, + "learning_rate": 8.98535783446412e-06, + "loss": 0.9044, + "step": 301800 + }, + { + "epoch": 4.21, + "learning_rate": 8.978392008804804e-06, + "loss": 0.8673, + "step": 301900 + }, + { + "epoch": 4.21, + "learning_rate": 8.971426183145489e-06, + "loss": 0.8897, + "step": 302000 + }, + { + "epoch": 4.21, + "learning_rate": 8.964460357486174e-06, + "loss": 0.8733, + "step": 302100 + }, + { + "epoch": 4.21, + "learning_rate": 8.957494531826857e-06, + "loss": 0.8798, + "step": 302200 + }, + { + "epoch": 4.21, + "learning_rate": 8.950598364424135e-06, + "loss": 0.8735, + "step": 302300 + }, + { + "epoch": 4.21, + "learning_rate": 8.94363253876482e-06, + "loss": 0.8819, + "step": 302400 + }, + { + "epoch": 4.21, + "learning_rate": 8.936666713105504e-06, + "loss": 0.9019, + "step": 302500 + }, + { + "epoch": 4.22, + "learning_rate": 8.929700887446189e-06, + "loss": 0.8547, + "step": 302600 + }, + { + "epoch": 4.22, + "learning_rate": 8.922735061786873e-06, + "loss": 0.8871, + "step": 302700 + }, + { + "epoch": 4.22, + "learning_rate": 8.915769236127558e-06, + "loss": 0.8906, + "step": 302800 + }, + { + "epoch": 4.22, + "learning_rate": 8.908803410468243e-06, + "loss": 0.9111, + "step": 302900 + }, + { + "epoch": 4.22, + "learning_rate": 8.901837584808927e-06, + "loss": 0.8933, + "step": 303000 + }, + { + "epoch": 4.22, + "learning_rate": 8.894871759149612e-06, + "loss": 0.8749, + "step": 303100 + }, + { + "epoch": 4.22, + "learning_rate": 8.887905933490297e-06, + "loss": 0.9143, + "step": 303200 + }, + { + "epoch": 4.23, + "learning_rate": 8.880940107830982e-06, + "loss": 0.8993, + "step": 303300 + }, + { + "epoch": 4.23, + "learning_rate": 8.873974282171666e-06, + "loss": 0.9112, + "step": 303400 + }, + { + "epoch": 4.23, + "learning_rate": 8.86700845651235e-06, + "loss": 0.8785, + "step": 303500 + }, + { + "epoch": 4.23, + "learning_rate": 8.860042630853034e-06, + "loss": 0.8886, + "step": 303600 + }, + { + "epoch": 4.23, + "learning_rate": 8.85307680519372e-06, + "loss": 0.938, + "step": 303700 + }, + { + "epoch": 4.23, + "learning_rate": 8.846110979534405e-06, + "loss": 0.854, + "step": 303800 + }, + { + "epoch": 4.23, + "learning_rate": 8.83914515387509e-06, + "loss": 0.926, + "step": 303900 + }, + { + "epoch": 4.24, + "learning_rate": 8.832179328215774e-06, + "loss": 0.9124, + "step": 304000 + }, + { + "epoch": 4.24, + "learning_rate": 8.825213502556459e-06, + "loss": 0.8968, + "step": 304100 + }, + { + "epoch": 4.24, + "learning_rate": 8.818247676897144e-06, + "loss": 0.9058, + "step": 304200 + }, + { + "epoch": 4.24, + "learning_rate": 8.811281851237828e-06, + "loss": 0.908, + "step": 304300 + }, + { + "epoch": 4.24, + "learning_rate": 8.804385683835105e-06, + "loss": 0.9382, + "step": 304400 + }, + { + "epoch": 4.24, + "learning_rate": 8.79741985817579e-06, + "loss": 0.8834, + "step": 304500 + }, + { + "epoch": 4.24, + "learning_rate": 8.790454032516476e-06, + "loss": 0.8964, + "step": 304600 + }, + { + "epoch": 4.24, + "learning_rate": 8.78348820685716e-06, + "loss": 0.9125, + "step": 304700 + }, + { + "epoch": 4.25, + "learning_rate": 8.776522381197844e-06, + "loss": 0.8903, + "step": 304800 + }, + { + "epoch": 4.25, + "learning_rate": 8.769556555538528e-06, + "loss": 0.8758, + "step": 304900 + }, + { + "epoch": 4.25, + "learning_rate": 8.762590729879213e-06, + "loss": 0.8857, + "step": 305000 + }, + { + "epoch": 4.25, + "learning_rate": 8.755624904219898e-06, + "loss": 0.9212, + "step": 305100 + }, + { + "epoch": 4.25, + "learning_rate": 8.748728736817174e-06, + "loss": 0.8922, + "step": 305200 + }, + { + "epoch": 4.25, + "learning_rate": 8.741762911157859e-06, + "loss": 0.9003, + "step": 305300 + }, + { + "epoch": 4.25, + "learning_rate": 8.734797085498545e-06, + "loss": 0.9132, + "step": 305400 + }, + { + "epoch": 4.26, + "learning_rate": 8.72783125983923e-06, + "loss": 0.8758, + "step": 305500 + }, + { + "epoch": 4.26, + "learning_rate": 8.720865434179914e-06, + "loss": 0.8875, + "step": 305600 + }, + { + "epoch": 4.26, + "learning_rate": 8.713899608520599e-06, + "loss": 0.9164, + "step": 305700 + }, + { + "epoch": 4.26, + "learning_rate": 8.706933782861284e-06, + "loss": 0.8996, + "step": 305800 + }, + { + "epoch": 4.26, + "learning_rate": 8.699967957201968e-06, + "loss": 0.9045, + "step": 305900 + }, + { + "epoch": 4.26, + "learning_rate": 8.693002131542653e-06, + "loss": 0.8863, + "step": 306000 + }, + { + "epoch": 4.26, + "learning_rate": 8.686036305883336e-06, + "loss": 0.8951, + "step": 306100 + }, + { + "epoch": 4.27, + "learning_rate": 8.67907048022402e-06, + "loss": 0.9016, + "step": 306200 + }, + { + "epoch": 4.27, + "learning_rate": 8.672104654564706e-06, + "loss": 0.9075, + "step": 306300 + }, + { + "epoch": 4.27, + "learning_rate": 8.66513882890539e-06, + "loss": 0.8794, + "step": 306400 + }, + { + "epoch": 4.27, + "learning_rate": 8.658173003246075e-06, + "loss": 0.9076, + "step": 306500 + }, + { + "epoch": 4.27, + "learning_rate": 8.65120717758676e-06, + "loss": 0.8812, + "step": 306600 + }, + { + "epoch": 4.27, + "learning_rate": 8.644241351927444e-06, + "loss": 0.8843, + "step": 306700 + }, + { + "epoch": 4.27, + "learning_rate": 8.637275526268129e-06, + "loss": 0.8714, + "step": 306800 + }, + { + "epoch": 4.28, + "learning_rate": 8.630309700608814e-06, + "loss": 0.879, + "step": 306900 + }, + { + "epoch": 4.28, + "learning_rate": 8.623343874949498e-06, + "loss": 0.8801, + "step": 307000 + }, + { + "epoch": 4.28, + "learning_rate": 8.616378049290183e-06, + "loss": 0.8613, + "step": 307100 + }, + { + "epoch": 4.28, + "learning_rate": 8.609412223630866e-06, + "loss": 0.887, + "step": 307200 + }, + { + "epoch": 4.28, + "learning_rate": 8.60244639797155e-06, + "loss": 0.8832, + "step": 307300 + }, + { + "epoch": 4.28, + "learning_rate": 8.595480572312235e-06, + "loss": 0.9144, + "step": 307400 + }, + { + "epoch": 4.28, + "learning_rate": 8.58851474665292e-06, + "loss": 0.8777, + "step": 307500 + }, + { + "epoch": 4.29, + "learning_rate": 8.581548920993605e-06, + "loss": 0.8765, + "step": 307600 + }, + { + "epoch": 4.29, + "learning_rate": 8.57458309533429e-06, + "loss": 0.9076, + "step": 307700 + }, + { + "epoch": 4.29, + "learning_rate": 8.567617269674976e-06, + "loss": 0.8719, + "step": 307800 + }, + { + "epoch": 4.29, + "learning_rate": 8.56065144401566e-06, + "loss": 0.8705, + "step": 307900 + }, + { + "epoch": 4.29, + "learning_rate": 8.553685618356345e-06, + "loss": 0.9109, + "step": 308000 + }, + { + "epoch": 4.29, + "learning_rate": 8.54671979269703e-06, + "loss": 0.9082, + "step": 308100 + }, + { + "epoch": 4.29, + "learning_rate": 8.539753967037715e-06, + "loss": 0.9211, + "step": 308200 + }, + { + "epoch": 4.3, + "learning_rate": 8.532788141378398e-06, + "loss": 0.8623, + "step": 308300 + }, + { + "epoch": 4.3, + "learning_rate": 8.525822315719082e-06, + "loss": 0.9063, + "step": 308400 + }, + { + "epoch": 4.3, + "learning_rate": 8.518856490059767e-06, + "loss": 0.91, + "step": 308500 + }, + { + "epoch": 4.3, + "learning_rate": 8.511890664400452e-06, + "loss": 0.9026, + "step": 308600 + }, + { + "epoch": 4.3, + "learning_rate": 8.504924838741136e-06, + "loss": 0.9274, + "step": 308700 + }, + { + "epoch": 4.3, + "learning_rate": 8.498028671338414e-06, + "loss": 0.8974, + "step": 308800 + }, + { + "epoch": 4.3, + "learning_rate": 8.491062845679099e-06, + "loss": 0.914, + "step": 308900 + }, + { + "epoch": 4.3, + "learning_rate": 8.484097020019784e-06, + "loss": 0.9032, + "step": 309000 + }, + { + "epoch": 4.31, + "learning_rate": 8.477131194360468e-06, + "loss": 0.895, + "step": 309100 + }, + { + "epoch": 4.31, + "learning_rate": 8.470165368701153e-06, + "loss": 0.9005, + "step": 309200 + }, + { + "epoch": 4.31, + "learning_rate": 8.463199543041838e-06, + "loss": 0.9147, + "step": 309300 + }, + { + "epoch": 4.31, + "learning_rate": 8.456233717382522e-06, + "loss": 0.913, + "step": 309400 + }, + { + "epoch": 4.31, + "learning_rate": 8.449267891723207e-06, + "loss": 0.9146, + "step": 309500 + }, + { + "epoch": 4.31, + "learning_rate": 8.44230206606389e-06, + "loss": 0.9002, + "step": 309600 + }, + { + "epoch": 4.31, + "learning_rate": 8.435336240404575e-06, + "loss": 0.9239, + "step": 309700 + }, + { + "epoch": 4.32, + "learning_rate": 8.42837041474526e-06, + "loss": 0.8964, + "step": 309800 + }, + { + "epoch": 4.32, + "learning_rate": 8.421404589085944e-06, + "loss": 0.9108, + "step": 309900 + }, + { + "epoch": 4.32, + "learning_rate": 8.414438763426629e-06, + "loss": 0.8997, + "step": 310000 + }, + { + "epoch": 4.32, + "learning_rate": 8.407472937767314e-06, + "loss": 0.8904, + "step": 310100 + }, + { + "epoch": 4.32, + "learning_rate": 8.400507112107998e-06, + "loss": 0.8925, + "step": 310200 + }, + { + "epoch": 4.32, + "learning_rate": 8.393541286448683e-06, + "loss": 0.8845, + "step": 310300 + }, + { + "epoch": 4.32, + "learning_rate": 8.386575460789368e-06, + "loss": 0.859, + "step": 310400 + }, + { + "epoch": 4.33, + "learning_rate": 8.379609635130052e-06, + "loss": 0.882, + "step": 310500 + }, + { + "epoch": 4.33, + "learning_rate": 8.372643809470737e-06, + "loss": 0.9013, + "step": 310600 + }, + { + "epoch": 4.33, + "learning_rate": 8.36567798381142e-06, + "loss": 0.8792, + "step": 310700 + }, + { + "epoch": 4.33, + "learning_rate": 8.358712158152105e-06, + "loss": 0.8754, + "step": 310800 + }, + { + "epoch": 4.33, + "learning_rate": 8.35174633249279e-06, + "loss": 0.8944, + "step": 310900 + }, + { + "epoch": 4.33, + "learning_rate": 8.344780506833474e-06, + "loss": 0.9278, + "step": 311000 + }, + { + "epoch": 4.33, + "learning_rate": 8.33781468117416e-06, + "loss": 0.9196, + "step": 311100 + }, + { + "epoch": 4.34, + "learning_rate": 8.330848855514845e-06, + "loss": 0.9093, + "step": 311200 + }, + { + "epoch": 4.34, + "learning_rate": 8.32388302985553e-06, + "loss": 0.8672, + "step": 311300 + }, + { + "epoch": 4.34, + "learning_rate": 8.316917204196214e-06, + "loss": 0.889, + "step": 311400 + }, + { + "epoch": 4.34, + "learning_rate": 8.309951378536899e-06, + "loss": 0.9111, + "step": 311500 + }, + { + "epoch": 4.34, + "learning_rate": 8.302985552877584e-06, + "loss": 0.9003, + "step": 311600 + }, + { + "epoch": 4.34, + "learning_rate": 8.296019727218268e-06, + "loss": 0.9248, + "step": 311700 + }, + { + "epoch": 4.34, + "learning_rate": 8.289053901558953e-06, + "loss": 0.9375, + "step": 311800 + }, + { + "epoch": 4.35, + "learning_rate": 8.282088075899636e-06, + "loss": 0.8977, + "step": 311900 + }, + { + "epoch": 4.35, + "learning_rate": 8.27512225024032e-06, + "loss": 0.9032, + "step": 312000 + }, + { + "epoch": 4.35, + "learning_rate": 8.268156424581006e-06, + "loss": 0.8761, + "step": 312100 + }, + { + "epoch": 4.35, + "learning_rate": 8.26119059892169e-06, + "loss": 0.9121, + "step": 312200 + }, + { + "epoch": 4.35, + "learning_rate": 8.254224773262375e-06, + "loss": 0.9079, + "step": 312300 + }, + { + "epoch": 4.35, + "learning_rate": 8.24725894760306e-06, + "loss": 0.8987, + "step": 312400 + }, + { + "epoch": 4.35, + "learning_rate": 8.240293121943744e-06, + "loss": 0.8772, + "step": 312500 + }, + { + "epoch": 4.36, + "learning_rate": 8.233327296284429e-06, + "loss": 0.8698, + "step": 312600 + }, + { + "epoch": 4.36, + "learning_rate": 8.226361470625114e-06, + "loss": 0.8869, + "step": 312700 + }, + { + "epoch": 4.36, + "learning_rate": 8.219395644965798e-06, + "loss": 0.8906, + "step": 312800 + }, + { + "epoch": 4.36, + "learning_rate": 8.212429819306483e-06, + "loss": 0.9012, + "step": 312900 + }, + { + "epoch": 4.36, + "learning_rate": 8.205463993647166e-06, + "loss": 0.9043, + "step": 313000 + }, + { + "epoch": 4.36, + "learning_rate": 8.19849816798785e-06, + "loss": 0.8731, + "step": 313100 + }, + { + "epoch": 4.36, + "learning_rate": 8.191532342328535e-06, + "loss": 0.8759, + "step": 313200 + }, + { + "epoch": 4.36, + "learning_rate": 8.18456651666922e-06, + "loss": 0.8886, + "step": 313300 + }, + { + "epoch": 4.37, + "learning_rate": 8.177600691009905e-06, + "loss": 0.9356, + "step": 313400 + }, + { + "epoch": 4.37, + "learning_rate": 8.17063486535059e-06, + "loss": 0.8879, + "step": 313500 + }, + { + "epoch": 4.37, + "learning_rate": 8.163669039691276e-06, + "loss": 0.8747, + "step": 313600 + }, + { + "epoch": 4.37, + "learning_rate": 8.15670321403196e-06, + "loss": 0.8869, + "step": 313700 + }, + { + "epoch": 4.37, + "learning_rate": 8.149737388372645e-06, + "loss": 0.8997, + "step": 313800 + }, + { + "epoch": 4.37, + "learning_rate": 8.14277156271333e-06, + "loss": 0.8978, + "step": 313900 + }, + { + "epoch": 4.37, + "learning_rate": 8.135805737054014e-06, + "loss": 0.8883, + "step": 314000 + }, + { + "epoch": 4.38, + "learning_rate": 8.128909569651291e-06, + "loss": 0.8676, + "step": 314100 + }, + { + "epoch": 4.38, + "learning_rate": 8.121943743991976e-06, + "loss": 0.9107, + "step": 314200 + }, + { + "epoch": 4.38, + "learning_rate": 8.11497791833266e-06, + "loss": 0.8825, + "step": 314300 + }, + { + "epoch": 4.38, + "learning_rate": 8.108012092673345e-06, + "loss": 0.9159, + "step": 314400 + }, + { + "epoch": 4.38, + "learning_rate": 8.10104626701403e-06, + "loss": 0.8902, + "step": 314500 + }, + { + "epoch": 4.38, + "learning_rate": 8.094080441354714e-06, + "loss": 0.913, + "step": 314600 + }, + { + "epoch": 4.38, + "learning_rate": 8.087114615695399e-06, + "loss": 0.8953, + "step": 314700 + }, + { + "epoch": 4.39, + "learning_rate": 8.080148790036084e-06, + "loss": 0.8857, + "step": 314800 + }, + { + "epoch": 4.39, + "learning_rate": 8.073182964376768e-06, + "loss": 0.8929, + "step": 314900 + }, + { + "epoch": 4.39, + "learning_rate": 8.066217138717453e-06, + "loss": 0.9126, + "step": 315000 + }, + { + "epoch": 4.39, + "learning_rate": 8.059251313058138e-06, + "loss": 0.8801, + "step": 315100 + }, + { + "epoch": 4.39, + "learning_rate": 8.052285487398822e-06, + "loss": 0.8607, + "step": 315200 + }, + { + "epoch": 4.39, + "learning_rate": 8.045319661739507e-06, + "loss": 0.934, + "step": 315300 + }, + { + "epoch": 4.39, + "learning_rate": 8.03835383608019e-06, + "loss": 0.9185, + "step": 315400 + }, + { + "epoch": 4.4, + "learning_rate": 8.031388010420875e-06, + "loss": 0.8689, + "step": 315500 + }, + { + "epoch": 4.4, + "learning_rate": 8.02442218476156e-06, + "loss": 0.8656, + "step": 315600 + }, + { + "epoch": 4.4, + "learning_rate": 8.017456359102244e-06, + "loss": 0.9096, + "step": 315700 + }, + { + "epoch": 4.4, + "learning_rate": 8.010490533442929e-06, + "loss": 0.8914, + "step": 315800 + }, + { + "epoch": 4.4, + "learning_rate": 8.003524707783614e-06, + "loss": 0.9163, + "step": 315900 + }, + { + "epoch": 4.4, + "learning_rate": 7.996558882124298e-06, + "loss": 0.9187, + "step": 316000 + }, + { + "epoch": 4.4, + "learning_rate": 7.989593056464983e-06, + "loss": 0.8844, + "step": 316100 + }, + { + "epoch": 4.41, + "learning_rate": 7.982696889062261e-06, + "loss": 0.8941, + "step": 316200 + }, + { + "epoch": 4.41, + "learning_rate": 7.975731063402946e-06, + "loss": 0.9091, + "step": 316300 + }, + { + "epoch": 4.41, + "learning_rate": 7.96876523774363e-06, + "loss": 0.9058, + "step": 316400 + }, + { + "epoch": 4.41, + "learning_rate": 7.961799412084315e-06, + "loss": 0.902, + "step": 316500 + }, + { + "epoch": 4.41, + "learning_rate": 7.954833586425e-06, + "loss": 0.9069, + "step": 316600 + }, + { + "epoch": 4.41, + "learning_rate": 7.947867760765683e-06, + "loss": 0.9132, + "step": 316700 + }, + { + "epoch": 4.41, + "learning_rate": 7.940901935106367e-06, + "loss": 0.9092, + "step": 316800 + }, + { + "epoch": 4.41, + "learning_rate": 7.933936109447052e-06, + "loss": 0.8787, + "step": 316900 + }, + { + "epoch": 4.42, + "learning_rate": 7.926970283787737e-06, + "loss": 0.8912, + "step": 317000 + }, + { + "epoch": 4.42, + "learning_rate": 7.920004458128421e-06, + "loss": 0.8981, + "step": 317100 + }, + { + "epoch": 4.42, + "learning_rate": 7.913038632469106e-06, + "loss": 0.8828, + "step": 317200 + }, + { + "epoch": 4.42, + "learning_rate": 7.90607280680979e-06, + "loss": 0.8916, + "step": 317300 + }, + { + "epoch": 4.42, + "learning_rate": 7.899106981150476e-06, + "loss": 0.8966, + "step": 317400 + }, + { + "epoch": 4.42, + "learning_rate": 7.89214115549116e-06, + "loss": 0.8944, + "step": 317500 + }, + { + "epoch": 4.42, + "learning_rate": 7.885175329831847e-06, + "loss": 0.9162, + "step": 317600 + }, + { + "epoch": 4.43, + "learning_rate": 7.878209504172531e-06, + "loss": 0.8874, + "step": 317700 + }, + { + "epoch": 4.43, + "learning_rate": 7.871243678513214e-06, + "loss": 0.8966, + "step": 317800 + }, + { + "epoch": 4.43, + "learning_rate": 7.864277852853899e-06, + "loss": 0.9229, + "step": 317900 + }, + { + "epoch": 4.43, + "learning_rate": 7.857312027194584e-06, + "loss": 0.9151, + "step": 318000 + }, + { + "epoch": 4.43, + "learning_rate": 7.850346201535268e-06, + "loss": 0.9226, + "step": 318100 + }, + { + "epoch": 4.43, + "learning_rate": 7.843380375875953e-06, + "loss": 0.9051, + "step": 318200 + }, + { + "epoch": 4.43, + "learning_rate": 7.836414550216638e-06, + "loss": 0.9119, + "step": 318300 + }, + { + "epoch": 4.44, + "learning_rate": 7.829448724557322e-06, + "loss": 0.8696, + "step": 318400 + }, + { + "epoch": 4.44, + "learning_rate": 7.822482898898007e-06, + "loss": 0.877, + "step": 318500 + }, + { + "epoch": 4.44, + "learning_rate": 7.815517073238692e-06, + "loss": 0.9048, + "step": 318600 + }, + { + "epoch": 4.44, + "learning_rate": 7.808551247579376e-06, + "loss": 0.8981, + "step": 318700 + }, + { + "epoch": 4.44, + "learning_rate": 7.801585421920061e-06, + "loss": 0.8949, + "step": 318800 + }, + { + "epoch": 4.44, + "learning_rate": 7.794619596260744e-06, + "loss": 0.8869, + "step": 318900 + }, + { + "epoch": 4.44, + "learning_rate": 7.787653770601429e-06, + "loss": 0.9065, + "step": 319000 + }, + { + "epoch": 4.45, + "learning_rate": 7.780687944942113e-06, + "loss": 0.8938, + "step": 319100 + }, + { + "epoch": 4.45, + "learning_rate": 7.773722119282798e-06, + "loss": 0.9054, + "step": 319200 + }, + { + "epoch": 4.45, + "learning_rate": 7.766756293623483e-06, + "loss": 0.8662, + "step": 319300 + }, + { + "epoch": 4.45, + "learning_rate": 7.759790467964167e-06, + "loss": 0.8957, + "step": 319400 + }, + { + "epoch": 4.45, + "learning_rate": 7.752824642304852e-06, + "loss": 0.8956, + "step": 319500 + }, + { + "epoch": 4.45, + "learning_rate": 7.745858816645537e-06, + "loss": 0.8782, + "step": 319600 + }, + { + "epoch": 4.45, + "learning_rate": 7.738892990986222e-06, + "loss": 0.8943, + "step": 319700 + }, + { + "epoch": 4.46, + "learning_rate": 7.731927165326906e-06, + "loss": 0.8972, + "step": 319800 + }, + { + "epoch": 4.46, + "learning_rate": 7.724961339667591e-06, + "loss": 0.9111, + "step": 319900 + }, + { + "epoch": 4.46, + "learning_rate": 7.717995514008276e-06, + "loss": 0.908, + "step": 320000 + }, + { + "epoch": 4.46, + "learning_rate": 7.71102968834896e-06, + "loss": 0.9101, + "step": 320100 + }, + { + "epoch": 4.46, + "learning_rate": 7.704133520946237e-06, + "loss": 0.8888, + "step": 320200 + }, + { + "epoch": 4.46, + "learning_rate": 7.697167695286921e-06, + "loss": 0.9402, + "step": 320300 + }, + { + "epoch": 4.46, + "learning_rate": 7.690201869627606e-06, + "loss": 0.9185, + "step": 320400 + }, + { + "epoch": 4.47, + "learning_rate": 7.68323604396829e-06, + "loss": 0.9445, + "step": 320500 + }, + { + "epoch": 4.47, + "learning_rate": 7.676339876565569e-06, + "loss": 0.862, + "step": 320600 + }, + { + "epoch": 4.47, + "learning_rate": 7.669374050906254e-06, + "loss": 0.8918, + "step": 320700 + }, + { + "epoch": 4.47, + "learning_rate": 7.662408225246938e-06, + "loss": 0.888, + "step": 320800 + }, + { + "epoch": 4.47, + "learning_rate": 7.655442399587623e-06, + "loss": 0.8846, + "step": 320900 + }, + { + "epoch": 4.47, + "learning_rate": 7.648476573928308e-06, + "loss": 0.8908, + "step": 321000 + }, + { + "epoch": 4.47, + "learning_rate": 7.641510748268992e-06, + "loss": 0.8922, + "step": 321100 + }, + { + "epoch": 4.47, + "learning_rate": 7.634544922609677e-06, + "loss": 0.8926, + "step": 321200 + }, + { + "epoch": 4.48, + "learning_rate": 7.6275790969503625e-06, + "loss": 0.8947, + "step": 321300 + }, + { + "epoch": 4.48, + "learning_rate": 7.620613271291047e-06, + "loss": 0.892, + "step": 321400 + }, + { + "epoch": 4.48, + "learning_rate": 7.61364744563173e-06, + "loss": 0.8965, + "step": 321500 + }, + { + "epoch": 4.48, + "learning_rate": 7.606681619972415e-06, + "loss": 0.9022, + "step": 321600 + }, + { + "epoch": 4.48, + "learning_rate": 7.5997157943130996e-06, + "loss": 0.8852, + "step": 321700 + }, + { + "epoch": 4.48, + "learning_rate": 7.592749968653784e-06, + "loss": 0.9176, + "step": 321800 + }, + { + "epoch": 4.48, + "learning_rate": 7.585784142994469e-06, + "loss": 0.8962, + "step": 321900 + }, + { + "epoch": 4.49, + "learning_rate": 7.578818317335154e-06, + "loss": 0.9282, + "step": 322000 + }, + { + "epoch": 4.49, + "learning_rate": 7.571852491675838e-06, + "loss": 0.9027, + "step": 322100 + }, + { + "epoch": 4.49, + "learning_rate": 7.564886666016523e-06, + "loss": 0.9116, + "step": 322200 + }, + { + "epoch": 4.49, + "learning_rate": 7.5579208403572085e-06, + "loss": 0.8951, + "step": 322300 + }, + { + "epoch": 4.49, + "learning_rate": 7.550955014697893e-06, + "loss": 0.9081, + "step": 322400 + }, + { + "epoch": 4.49, + "learning_rate": 7.543989189038578e-06, + "loss": 0.9026, + "step": 322500 + }, + { + "epoch": 4.49, + "learning_rate": 7.5370233633792625e-06, + "loss": 0.8898, + "step": 322600 + }, + { + "epoch": 4.5, + "learning_rate": 7.53012719597654e-06, + "loss": 0.8865, + "step": 322700 + }, + { + "epoch": 4.5, + "learning_rate": 7.523161370317223e-06, + "loss": 0.9049, + "step": 322800 + }, + { + "epoch": 4.5, + "learning_rate": 7.5161955446579075e-06, + "loss": 0.9006, + "step": 322900 + }, + { + "epoch": 4.5, + "learning_rate": 7.509229718998592e-06, + "loss": 0.8915, + "step": 323000 + }, + { + "epoch": 4.5, + "learning_rate": 7.502263893339278e-06, + "loss": 0.8986, + "step": 323100 + }, + { + "epoch": 4.5, + "learning_rate": 7.495298067679962e-06, + "loss": 0.8966, + "step": 323200 + }, + { + "epoch": 4.5, + "learning_rate": 7.488332242020647e-06, + "loss": 0.8789, + "step": 323300 + }, + { + "epoch": 4.51, + "learning_rate": 7.481366416361332e-06, + "loss": 0.9292, + "step": 323400 + }, + { + "epoch": 4.51, + "learning_rate": 7.4744005907020164e-06, + "loss": 0.9006, + "step": 323500 + }, + { + "epoch": 4.51, + "learning_rate": 7.467434765042701e-06, + "loss": 0.9103, + "step": 323600 + }, + { + "epoch": 4.51, + "learning_rate": 7.460468939383385e-06, + "loss": 0.902, + "step": 323700 + }, + { + "epoch": 4.51, + "learning_rate": 7.45350311372407e-06, + "loss": 0.904, + "step": 323800 + }, + { + "epoch": 4.51, + "learning_rate": 7.446537288064754e-06, + "loss": 0.897, + "step": 323900 + }, + { + "epoch": 4.51, + "learning_rate": 7.439571462405439e-06, + "loss": 0.9162, + "step": 324000 + }, + { + "epoch": 4.52, + "learning_rate": 7.432605636746124e-06, + "loss": 0.9034, + "step": 324100 + }, + { + "epoch": 4.52, + "learning_rate": 7.4256398110868075e-06, + "loss": 0.8902, + "step": 324200 + }, + { + "epoch": 4.52, + "learning_rate": 7.418673985427493e-06, + "loss": 0.9144, + "step": 324300 + }, + { + "epoch": 4.52, + "learning_rate": 7.411708159768178e-06, + "loss": 0.8973, + "step": 324400 + }, + { + "epoch": 4.52, + "learning_rate": 7.4047423341088624e-06, + "loss": 0.8659, + "step": 324500 + }, + { + "epoch": 4.52, + "learning_rate": 7.397776508449547e-06, + "loss": 0.9041, + "step": 324600 + }, + { + "epoch": 4.52, + "learning_rate": 7.390810682790232e-06, + "loss": 0.9243, + "step": 324700 + }, + { + "epoch": 4.53, + "learning_rate": 7.383844857130916e-06, + "loss": 0.9291, + "step": 324800 + }, + { + "epoch": 4.53, + "learning_rate": 7.376948689728194e-06, + "loss": 0.9036, + "step": 324900 + }, + { + "epoch": 4.53, + "learning_rate": 7.369982864068878e-06, + "loss": 0.9243, + "step": 325000 + }, + { + "epoch": 4.53, + "learning_rate": 7.363017038409562e-06, + "loss": 0.8956, + "step": 325100 + }, + { + "epoch": 4.53, + "learning_rate": 7.356051212750248e-06, + "loss": 0.8965, + "step": 325200 + }, + { + "epoch": 4.53, + "learning_rate": 7.3490853870909325e-06, + "loss": 0.8957, + "step": 325300 + }, + { + "epoch": 4.53, + "learning_rate": 7.342119561431617e-06, + "loss": 0.9157, + "step": 325400 + }, + { + "epoch": 4.53, + "learning_rate": 7.335153735772301e-06, + "loss": 0.9129, + "step": 325500 + }, + { + "epoch": 4.54, + "learning_rate": 7.328187910112986e-06, + "loss": 0.862, + "step": 325600 + }, + { + "epoch": 4.54, + "learning_rate": 7.32122208445367e-06, + "loss": 0.9181, + "step": 325700 + }, + { + "epoch": 4.54, + "learning_rate": 7.314256258794355e-06, + "loss": 0.9186, + "step": 325800 + }, + { + "epoch": 4.54, + "learning_rate": 7.30729043313504e-06, + "loss": 0.9071, + "step": 325900 + }, + { + "epoch": 4.54, + "learning_rate": 7.3003246074757244e-06, + "loss": 0.9064, + "step": 326000 + }, + { + "epoch": 4.54, + "learning_rate": 7.293358781816408e-06, + "loss": 0.8919, + "step": 326100 + }, + { + "epoch": 4.54, + "learning_rate": 7.286392956157093e-06, + "loss": 0.8977, + "step": 326200 + }, + { + "epoch": 4.55, + "learning_rate": 7.279427130497778e-06, + "loss": 0.8911, + "step": 326300 + }, + { + "epoch": 4.55, + "learning_rate": 7.272461304838463e-06, + "loss": 0.9122, + "step": 326400 + }, + { + "epoch": 4.55, + "learning_rate": 7.265495479179148e-06, + "loss": 0.8897, + "step": 326500 + }, + { + "epoch": 4.55, + "learning_rate": 7.2585296535198325e-06, + "loss": 0.8913, + "step": 326600 + }, + { + "epoch": 4.55, + "learning_rate": 7.251563827860516e-06, + "loss": 0.9271, + "step": 326700 + }, + { + "epoch": 4.55, + "learning_rate": 7.244598002201201e-06, + "loss": 0.8734, + "step": 326800 + }, + { + "epoch": 4.55, + "learning_rate": 7.237632176541886e-06, + "loss": 0.9086, + "step": 326900 + }, + { + "epoch": 4.56, + "learning_rate": 7.230736009139163e-06, + "loss": 0.8985, + "step": 327000 + }, + { + "epoch": 4.56, + "learning_rate": 7.223770183479848e-06, + "loss": 0.8944, + "step": 327100 + }, + { + "epoch": 4.56, + "learning_rate": 7.216804357820533e-06, + "loss": 0.8879, + "step": 327200 + }, + { + "epoch": 4.56, + "learning_rate": 7.209838532161218e-06, + "loss": 0.8855, + "step": 327300 + }, + { + "epoch": 4.56, + "learning_rate": 7.202872706501902e-06, + "loss": 0.9174, + "step": 327400 + }, + { + "epoch": 4.56, + "learning_rate": 7.195906880842586e-06, + "loss": 0.9044, + "step": 327500 + }, + { + "epoch": 4.56, + "learning_rate": 7.188941055183271e-06, + "loss": 0.9211, + "step": 327600 + }, + { + "epoch": 4.57, + "learning_rate": 7.181975229523956e-06, + "loss": 0.8787, + "step": 327700 + }, + { + "epoch": 4.57, + "learning_rate": 7.1750094038646405e-06, + "loss": 0.9017, + "step": 327800 + }, + { + "epoch": 4.57, + "learning_rate": 7.168043578205325e-06, + "loss": 0.8862, + "step": 327900 + }, + { + "epoch": 4.57, + "learning_rate": 7.161077752546009e-06, + "loss": 0.8903, + "step": 328000 + }, + { + "epoch": 4.57, + "learning_rate": 7.154111926886694e-06, + "loss": 0.8577, + "step": 328100 + }, + { + "epoch": 4.57, + "learning_rate": 7.147146101227378e-06, + "loss": 0.889, + "step": 328200 + }, + { + "epoch": 4.57, + "learning_rate": 7.140180275568063e-06, + "loss": 0.8597, + "step": 328300 + }, + { + "epoch": 4.58, + "learning_rate": 7.1332144499087486e-06, + "loss": 0.8797, + "step": 328400 + }, + { + "epoch": 4.58, + "learning_rate": 7.126248624249432e-06, + "loss": 0.8777, + "step": 328500 + }, + { + "epoch": 4.58, + "learning_rate": 7.119282798590117e-06, + "loss": 0.8944, + "step": 328600 + }, + { + "epoch": 4.58, + "learning_rate": 7.112316972930802e-06, + "loss": 0.9213, + "step": 328700 + }, + { + "epoch": 4.58, + "learning_rate": 7.1053511472714865e-06, + "loss": 0.9316, + "step": 328800 + }, + { + "epoch": 4.58, + "learning_rate": 7.098385321612171e-06, + "loss": 0.9004, + "step": 328900 + }, + { + "epoch": 4.58, + "learning_rate": 7.091419495952856e-06, + "loss": 0.9317, + "step": 329000 + }, + { + "epoch": 4.58, + "learning_rate": 7.08445367029354e-06, + "loss": 0.8835, + "step": 329100 + }, + { + "epoch": 4.59, + "learning_rate": 7.077487844634224e-06, + "loss": 0.9031, + "step": 329200 + }, + { + "epoch": 4.59, + "learning_rate": 7.070522018974909e-06, + "loss": 0.8982, + "step": 329300 + }, + { + "epoch": 4.59, + "learning_rate": 7.063556193315594e-06, + "loss": 0.9028, + "step": 329400 + }, + { + "epoch": 4.59, + "learning_rate": 7.056590367656278e-06, + "loss": 0.8781, + "step": 329500 + }, + { + "epoch": 4.59, + "learning_rate": 7.049624541996963e-06, + "loss": 0.9082, + "step": 329600 + }, + { + "epoch": 4.59, + "learning_rate": 7.042658716337648e-06, + "loss": 0.9128, + "step": 329700 + }, + { + "epoch": 4.59, + "learning_rate": 7.0356928906783324e-06, + "loss": 0.9191, + "step": 329800 + }, + { + "epoch": 4.6, + "learning_rate": 7.028727065019017e-06, + "loss": 0.8662, + "step": 329900 + }, + { + "epoch": 4.6, + "learning_rate": 7.021761239359702e-06, + "loss": 0.9145, + "step": 330000 + }, + { + "epoch": 4.6, + "learning_rate": 7.0147954137003865e-06, + "loss": 0.9019, + "step": 330100 + }, + { + "epoch": 4.6, + "learning_rate": 7.00782958804107e-06, + "loss": 0.9101, + "step": 330200 + }, + { + "epoch": 4.6, + "learning_rate": 7.000863762381755e-06, + "loss": 0.9263, + "step": 330300 + }, + { + "epoch": 4.6, + "learning_rate": 6.99389793672244e-06, + "loss": 0.9352, + "step": 330400 + }, + { + "epoch": 4.6, + "learning_rate": 6.986932111063124e-06, + "loss": 0.9041, + "step": 330500 + }, + { + "epoch": 4.61, + "learning_rate": 6.979966285403809e-06, + "loss": 0.9118, + "step": 330600 + }, + { + "epoch": 4.61, + "learning_rate": 6.973000459744494e-06, + "loss": 0.8861, + "step": 330700 + }, + { + "epoch": 4.61, + "learning_rate": 6.966034634085178e-06, + "loss": 0.9053, + "step": 330800 + }, + { + "epoch": 4.61, + "learning_rate": 6.959068808425863e-06, + "loss": 0.8984, + "step": 330900 + }, + { + "epoch": 4.61, + "learning_rate": 6.952102982766548e-06, + "loss": 0.8753, + "step": 331000 + }, + { + "epoch": 4.61, + "learning_rate": 6.945206815363825e-06, + "loss": 0.8981, + "step": 331100 + }, + { + "epoch": 4.61, + "learning_rate": 6.93824098970451e-06, + "loss": 0.9, + "step": 331200 + }, + { + "epoch": 4.62, + "learning_rate": 6.9312751640451944e-06, + "loss": 0.887, + "step": 331300 + }, + { + "epoch": 4.62, + "learning_rate": 6.924309338385879e-06, + "loss": 0.9234, + "step": 331400 + }, + { + "epoch": 4.62, + "learning_rate": 6.917343512726563e-06, + "loss": 0.9033, + "step": 331500 + }, + { + "epoch": 4.62, + "learning_rate": 6.910377687067248e-06, + "loss": 0.8558, + "step": 331600 + }, + { + "epoch": 4.62, + "learning_rate": 6.903411861407933e-06, + "loss": 0.8931, + "step": 331700 + }, + { + "epoch": 4.62, + "learning_rate": 6.896446035748618e-06, + "loss": 0.9008, + "step": 331800 + }, + { + "epoch": 4.62, + "learning_rate": 6.8894802100893025e-06, + "loss": 0.8831, + "step": 331900 + }, + { + "epoch": 4.63, + "learning_rate": 6.882514384429987e-06, + "loss": 0.8829, + "step": 332000 + }, + { + "epoch": 4.63, + "learning_rate": 6.875548558770671e-06, + "loss": 0.9051, + "step": 332100 + }, + { + "epoch": 4.63, + "learning_rate": 6.868582733111356e-06, + "loss": 0.905, + "step": 332200 + }, + { + "epoch": 4.63, + "learning_rate": 6.86161690745204e-06, + "loss": 0.9167, + "step": 332300 + }, + { + "epoch": 4.63, + "learning_rate": 6.854651081792725e-06, + "loss": 0.9018, + "step": 332400 + }, + { + "epoch": 4.63, + "learning_rate": 6.84768525613341e-06, + "loss": 0.8984, + "step": 332500 + }, + { + "epoch": 4.63, + "learning_rate": 6.840719430474094e-06, + "loss": 0.9024, + "step": 332600 + }, + { + "epoch": 4.64, + "learning_rate": 6.833753604814778e-06, + "loss": 0.89, + "step": 332700 + }, + { + "epoch": 4.64, + "learning_rate": 6.826787779155463e-06, + "loss": 0.9009, + "step": 332800 + }, + { + "epoch": 4.64, + "learning_rate": 6.8198219534961485e-06, + "loss": 0.9188, + "step": 332900 + }, + { + "epoch": 4.64, + "learning_rate": 6.812856127836833e-06, + "loss": 0.9017, + "step": 333000 + }, + { + "epoch": 4.64, + "learning_rate": 6.8059599604341105e-06, + "loss": 0.9005, + "step": 333100 + }, + { + "epoch": 4.64, + "learning_rate": 6.798994134774795e-06, + "loss": 0.9046, + "step": 333200 + }, + { + "epoch": 4.64, + "learning_rate": 6.79202830911548e-06, + "loss": 0.9067, + "step": 333300 + }, + { + "epoch": 4.64, + "learning_rate": 6.785062483456164e-06, + "loss": 0.9117, + "step": 333400 + }, + { + "epoch": 4.65, + "learning_rate": 6.778096657796848e-06, + "loss": 0.9139, + "step": 333500 + }, + { + "epoch": 4.65, + "learning_rate": 6.771130832137533e-06, + "loss": 0.9028, + "step": 333600 + }, + { + "epoch": 4.65, + "learning_rate": 6.764165006478219e-06, + "loss": 0.8987, + "step": 333700 + }, + { + "epoch": 4.65, + "learning_rate": 6.757268839075496e-06, + "loss": 0.8911, + "step": 333800 + }, + { + "epoch": 4.65, + "learning_rate": 6.7503030134161805e-06, + "loss": 0.8636, + "step": 333900 + }, + { + "epoch": 4.65, + "learning_rate": 6.743337187756865e-06, + "loss": 0.902, + "step": 334000 + }, + { + "epoch": 4.65, + "learning_rate": 6.736371362097549e-06, + "loss": 0.8968, + "step": 334100 + }, + { + "epoch": 4.66, + "learning_rate": 6.729405536438234e-06, + "loss": 0.8989, + "step": 334200 + }, + { + "epoch": 4.66, + "learning_rate": 6.7224397107789184e-06, + "loss": 0.887, + "step": 334300 + }, + { + "epoch": 4.66, + "learning_rate": 6.715473885119603e-06, + "loss": 0.886, + "step": 334400 + }, + { + "epoch": 4.66, + "learning_rate": 6.708508059460289e-06, + "loss": 0.9102, + "step": 334500 + }, + { + "epoch": 4.66, + "learning_rate": 6.701542233800973e-06, + "loss": 0.895, + "step": 334600 + }, + { + "epoch": 4.66, + "learning_rate": 6.694576408141657e-06, + "loss": 0.8741, + "step": 334700 + }, + { + "epoch": 4.66, + "learning_rate": 6.687610582482342e-06, + "loss": 0.9149, + "step": 334800 + }, + { + "epoch": 4.67, + "learning_rate": 6.6806447568230265e-06, + "loss": 0.8864, + "step": 334900 + }, + { + "epoch": 4.67, + "learning_rate": 6.673678931163711e-06, + "loss": 0.895, + "step": 335000 + }, + { + "epoch": 4.67, + "learning_rate": 6.666713105504396e-06, + "loss": 0.902, + "step": 335100 + }, + { + "epoch": 4.67, + "learning_rate": 6.65974727984508e-06, + "loss": 0.8989, + "step": 335200 + }, + { + "epoch": 4.67, + "learning_rate": 6.652781454185764e-06, + "loss": 0.9056, + "step": 335300 + }, + { + "epoch": 4.67, + "learning_rate": 6.645815628526449e-06, + "loss": 0.8864, + "step": 335400 + }, + { + "epoch": 4.67, + "learning_rate": 6.638849802867134e-06, + "loss": 0.9214, + "step": 335500 + }, + { + "epoch": 4.68, + "learning_rate": 6.6318839772078185e-06, + "loss": 0.8958, + "step": 335600 + }, + { + "epoch": 4.68, + "learning_rate": 6.624918151548503e-06, + "loss": 0.9103, + "step": 335700 + }, + { + "epoch": 4.68, + "learning_rate": 6.617952325889188e-06, + "loss": 0.8671, + "step": 335800 + }, + { + "epoch": 4.68, + "learning_rate": 6.6109865002298725e-06, + "loss": 0.9166, + "step": 335900 + }, + { + "epoch": 4.68, + "learning_rate": 6.604020674570557e-06, + "loss": 0.8945, + "step": 336000 + }, + { + "epoch": 4.68, + "learning_rate": 6.597054848911242e-06, + "loss": 0.923, + "step": 336100 + }, + { + "epoch": 4.68, + "learning_rate": 6.5900890232519266e-06, + "loss": 0.9177, + "step": 336200 + }, + { + "epoch": 4.69, + "learning_rate": 6.583123197592611e-06, + "loss": 0.905, + "step": 336300 + }, + { + "epoch": 4.69, + "learning_rate": 6.576157371933295e-06, + "loss": 0.9267, + "step": 336400 + }, + { + "epoch": 4.69, + "learning_rate": 6.56919154627398e-06, + "loss": 0.8953, + "step": 336500 + }, + { + "epoch": 4.69, + "learning_rate": 6.5622257206146644e-06, + "loss": 0.8962, + "step": 336600 + }, + { + "epoch": 4.69, + "learning_rate": 6.555259894955349e-06, + "loss": 0.9365, + "step": 336700 + }, + { + "epoch": 4.69, + "learning_rate": 6.548294069296034e-06, + "loss": 0.8649, + "step": 336800 + }, + { + "epoch": 4.69, + "learning_rate": 6.541328243636718e-06, + "loss": 0.8944, + "step": 336900 + }, + { + "epoch": 4.69, + "learning_rate": 6.534362417977403e-06, + "loss": 0.9301, + "step": 337000 + }, + { + "epoch": 4.7, + "learning_rate": 6.527396592318088e-06, + "loss": 0.8917, + "step": 337100 + }, + { + "epoch": 4.7, + "learning_rate": 6.520500424915365e-06, + "loss": 0.8989, + "step": 337200 + }, + { + "epoch": 4.7, + "learning_rate": 6.51353459925605e-06, + "loss": 0.8881, + "step": 337300 + }, + { + "epoch": 4.7, + "learning_rate": 6.5065687735967345e-06, + "loss": 0.9066, + "step": 337400 + }, + { + "epoch": 4.7, + "learning_rate": 6.499602947937419e-06, + "loss": 0.8719, + "step": 337500 + }, + { + "epoch": 4.7, + "learning_rate": 6.492637122278104e-06, + "loss": 0.9109, + "step": 337600 + }, + { + "epoch": 4.7, + "learning_rate": 6.485671296618788e-06, + "loss": 0.8819, + "step": 337700 + }, + { + "epoch": 4.71, + "learning_rate": 6.478705470959473e-06, + "loss": 0.9111, + "step": 337800 + }, + { + "epoch": 4.71, + "learning_rate": 6.471739645300158e-06, + "loss": 0.9033, + "step": 337900 + }, + { + "epoch": 4.71, + "learning_rate": 6.464773819640843e-06, + "loss": 0.9152, + "step": 338000 + }, + { + "epoch": 4.71, + "learning_rate": 6.457807993981527e-06, + "loss": 0.8903, + "step": 338100 + }, + { + "epoch": 4.71, + "learning_rate": 6.450842168322211e-06, + "loss": 0.8815, + "step": 338200 + }, + { + "epoch": 4.71, + "learning_rate": 6.443876342662896e-06, + "loss": 0.9236, + "step": 338300 + }, + { + "epoch": 4.71, + "learning_rate": 6.4369105170035805e-06, + "loss": 0.899, + "step": 338400 + }, + { + "epoch": 4.72, + "learning_rate": 6.429944691344265e-06, + "loss": 0.8957, + "step": 338500 + }, + { + "epoch": 4.72, + "learning_rate": 6.42297886568495e-06, + "loss": 0.8671, + "step": 338600 + }, + { + "epoch": 4.72, + "learning_rate": 6.4160130400256345e-06, + "loss": 0.9103, + "step": 338700 + }, + { + "epoch": 4.72, + "learning_rate": 6.409047214366318e-06, + "loss": 0.8624, + "step": 338800 + }, + { + "epoch": 4.72, + "learning_rate": 6.402081388707003e-06, + "loss": 0.8942, + "step": 338900 + }, + { + "epoch": 4.72, + "learning_rate": 6.395115563047689e-06, + "loss": 0.9036, + "step": 339000 + }, + { + "epoch": 4.72, + "learning_rate": 6.388149737388373e-06, + "loss": 0.8815, + "step": 339100 + }, + { + "epoch": 4.73, + "learning_rate": 6.381183911729058e-06, + "loss": 0.9228, + "step": 339200 + }, + { + "epoch": 4.73, + "learning_rate": 6.374218086069742e-06, + "loss": 0.8905, + "step": 339300 + }, + { + "epoch": 4.73, + "learning_rate": 6.3672522604104265e-06, + "loss": 0.8905, + "step": 339400 + }, + { + "epoch": 4.73, + "learning_rate": 6.360356093007704e-06, + "loss": 0.871, + "step": 339500 + }, + { + "epoch": 4.73, + "learning_rate": 6.3533902673483884e-06, + "loss": 0.8743, + "step": 339600 + }, + { + "epoch": 4.73, + "learning_rate": 6.346424441689073e-06, + "loss": 0.8953, + "step": 339700 + }, + { + "epoch": 4.73, + "learning_rate": 6.339458616029759e-06, + "loss": 0.9128, + "step": 339800 + }, + { + "epoch": 4.74, + "learning_rate": 6.332492790370443e-06, + "loss": 0.9017, + "step": 339900 + }, + { + "epoch": 4.74, + "learning_rate": 6.325526964711128e-06, + "loss": 0.8974, + "step": 340000 + }, + { + "epoch": 4.74, + "learning_rate": 6.318561139051812e-06, + "loss": 0.9075, + "step": 340100 + }, + { + "epoch": 4.74, + "learning_rate": 6.3115953133924965e-06, + "loss": 0.9063, + "step": 340200 + }, + { + "epoch": 4.74, + "learning_rate": 6.304629487733181e-06, + "loss": 0.9167, + "step": 340300 + }, + { + "epoch": 4.74, + "learning_rate": 6.297663662073866e-06, + "loss": 0.873, + "step": 340400 + }, + { + "epoch": 4.74, + "learning_rate": 6.290697836414551e-06, + "loss": 0.8829, + "step": 340500 + }, + { + "epoch": 4.75, + "learning_rate": 6.283732010755234e-06, + "loss": 0.8914, + "step": 340600 + }, + { + "epoch": 4.75, + "learning_rate": 6.276766185095919e-06, + "loss": 0.9371, + "step": 340700 + }, + { + "epoch": 4.75, + "learning_rate": 6.269800359436604e-06, + "loss": 0.8828, + "step": 340800 + }, + { + "epoch": 4.75, + "learning_rate": 6.2628345337772885e-06, + "loss": 0.895, + "step": 340900 + }, + { + "epoch": 4.75, + "learning_rate": 6.255868708117973e-06, + "loss": 0.8914, + "step": 341000 + }, + { + "epoch": 4.75, + "learning_rate": 6.248972540715251e-06, + "loss": 0.9313, + "step": 341100 + }, + { + "epoch": 4.75, + "learning_rate": 6.242006715055936e-06, + "loss": 0.879, + "step": 341200 + }, + { + "epoch": 4.75, + "learning_rate": 6.235040889396621e-06, + "loss": 0.8908, + "step": 341300 + }, + { + "epoch": 4.76, + "learning_rate": 6.2280750637373045e-06, + "loss": 0.9114, + "step": 341400 + }, + { + "epoch": 4.76, + "learning_rate": 6.221109238077989e-06, + "loss": 0.9086, + "step": 341500 + }, + { + "epoch": 4.76, + "learning_rate": 6.214143412418674e-06, + "loss": 0.9252, + "step": 341600 + }, + { + "epoch": 4.76, + "learning_rate": 6.2071775867593585e-06, + "loss": 0.9261, + "step": 341700 + }, + { + "epoch": 4.76, + "learning_rate": 6.200211761100043e-06, + "loss": 0.8984, + "step": 341800 + }, + { + "epoch": 4.76, + "learning_rate": 6.193245935440728e-06, + "loss": 0.8705, + "step": 341900 + }, + { + "epoch": 4.76, + "learning_rate": 6.186280109781413e-06, + "loss": 0.8985, + "step": 342000 + }, + { + "epoch": 4.77, + "learning_rate": 6.179314284122097e-06, + "loss": 0.8767, + "step": 342100 + }, + { + "epoch": 4.77, + "learning_rate": 6.172348458462782e-06, + "loss": 0.8924, + "step": 342200 + }, + { + "epoch": 4.77, + "learning_rate": 6.165382632803467e-06, + "loss": 0.9175, + "step": 342300 + }, + { + "epoch": 4.77, + "learning_rate": 6.158416807144151e-06, + "loss": 0.9246, + "step": 342400 + }, + { + "epoch": 4.77, + "learning_rate": 6.151450981484835e-06, + "loss": 0.8804, + "step": 342500 + }, + { + "epoch": 4.77, + "learning_rate": 6.14448515582552e-06, + "loss": 0.8869, + "step": 342600 + }, + { + "epoch": 4.77, + "learning_rate": 6.1375193301662045e-06, + "loss": 0.919, + "step": 342700 + }, + { + "epoch": 4.78, + "learning_rate": 6.130553504506889e-06, + "loss": 0.9046, + "step": 342800 + }, + { + "epoch": 4.78, + "learning_rate": 6.123587678847574e-06, + "loss": 0.8937, + "step": 342900 + }, + { + "epoch": 4.78, + "learning_rate": 6.1166218531882586e-06, + "loss": 0.8938, + "step": 343000 + }, + { + "epoch": 4.78, + "learning_rate": 6.109656027528943e-06, + "loss": 0.903, + "step": 343100 + }, + { + "epoch": 4.78, + "learning_rate": 6.102690201869628e-06, + "loss": 0.8774, + "step": 343200 + }, + { + "epoch": 4.78, + "learning_rate": 6.095724376210313e-06, + "loss": 0.8893, + "step": 343300 + }, + { + "epoch": 4.78, + "learning_rate": 6.088758550550997e-06, + "loss": 0.8893, + "step": 343400 + }, + { + "epoch": 4.79, + "learning_rate": 6.081792724891682e-06, + "loss": 0.8624, + "step": 343500 + }, + { + "epoch": 4.79, + "learning_rate": 6.074826899232366e-06, + "loss": 0.8782, + "step": 343600 + }, + { + "epoch": 4.79, + "learning_rate": 6.0678610735730505e-06, + "loss": 0.8608, + "step": 343700 + }, + { + "epoch": 4.79, + "learning_rate": 6.060895247913735e-06, + "loss": 0.8896, + "step": 343800 + }, + { + "epoch": 4.79, + "learning_rate": 6.05392942225442e-06, + "loss": 0.9133, + "step": 343900 + }, + { + "epoch": 4.79, + "learning_rate": 6.0469635965951045e-06, + "loss": 0.9189, + "step": 344000 + }, + { + "epoch": 4.79, + "learning_rate": 6.039997770935789e-06, + "loss": 0.908, + "step": 344100 + }, + { + "epoch": 4.8, + "learning_rate": 6.033031945276473e-06, + "loss": 0.902, + "step": 344200 + }, + { + "epoch": 4.8, + "learning_rate": 6.026066119617159e-06, + "loss": 0.8951, + "step": 344300 + }, + { + "epoch": 4.8, + "learning_rate": 6.019100293957843e-06, + "loss": 0.9014, + "step": 344400 + }, + { + "epoch": 4.8, + "learning_rate": 6.012134468298528e-06, + "loss": 0.8941, + "step": 344500 + }, + { + "epoch": 4.8, + "learning_rate": 6.005168642639213e-06, + "loss": 0.9028, + "step": 344600 + }, + { + "epoch": 4.8, + "learning_rate": 5.9982028169798965e-06, + "loss": 0.8792, + "step": 344700 + }, + { + "epoch": 4.8, + "learning_rate": 5.991236991320581e-06, + "loss": 0.9255, + "step": 344800 + }, + { + "epoch": 4.81, + "learning_rate": 5.984271165661266e-06, + "loss": 0.8769, + "step": 344900 + }, + { + "epoch": 4.81, + "learning_rate": 5.9773053400019505e-06, + "loss": 0.9103, + "step": 345000 + }, + { + "epoch": 4.81, + "learning_rate": 5.970339514342635e-06, + "loss": 0.924, + "step": 345100 + }, + { + "epoch": 4.81, + "learning_rate": 5.96337368868332e-06, + "loss": 0.8969, + "step": 345200 + }, + { + "epoch": 4.81, + "learning_rate": 5.956407863024004e-06, + "loss": 0.9029, + "step": 345300 + }, + { + "epoch": 4.81, + "learning_rate": 5.949442037364688e-06, + "loss": 0.9153, + "step": 345400 + }, + { + "epoch": 4.81, + "learning_rate": 5.9425458699619665e-06, + "loss": 0.8868, + "step": 345500 + }, + { + "epoch": 4.81, + "learning_rate": 5.935580044302651e-06, + "loss": 0.9184, + "step": 345600 + }, + { + "epoch": 4.82, + "learning_rate": 5.928614218643336e-06, + "loss": 0.8876, + "step": 345700 + }, + { + "epoch": 4.82, + "learning_rate": 5.921648392984021e-06, + "loss": 0.9123, + "step": 345800 + }, + { + "epoch": 4.82, + "learning_rate": 5.914682567324705e-06, + "loss": 0.8983, + "step": 345900 + }, + { + "epoch": 4.82, + "learning_rate": 5.90771674166539e-06, + "loss": 0.9041, + "step": 346000 + }, + { + "epoch": 4.82, + "learning_rate": 5.900750916006074e-06, + "loss": 0.9088, + "step": 346100 + }, + { + "epoch": 4.82, + "learning_rate": 5.8937850903467585e-06, + "loss": 0.8975, + "step": 346200 + }, + { + "epoch": 4.82, + "learning_rate": 5.886819264687443e-06, + "loss": 0.8626, + "step": 346300 + }, + { + "epoch": 4.83, + "learning_rate": 5.879853439028129e-06, + "loss": 0.9134, + "step": 346400 + }, + { + "epoch": 4.83, + "learning_rate": 5.872887613368813e-06, + "loss": 0.9149, + "step": 346500 + }, + { + "epoch": 4.83, + "learning_rate": 5.865991445966091e-06, + "loss": 0.9046, + "step": 346600 + }, + { + "epoch": 4.83, + "learning_rate": 5.859025620306775e-06, + "loss": 0.8771, + "step": 346700 + }, + { + "epoch": 4.83, + "learning_rate": 5.852059794647459e-06, + "loss": 0.9007, + "step": 346800 + }, + { + "epoch": 4.83, + "learning_rate": 5.845093968988144e-06, + "loss": 0.8749, + "step": 346900 + }, + { + "epoch": 4.83, + "learning_rate": 5.8381281433288285e-06, + "loss": 0.924, + "step": 347000 + }, + { + "epoch": 4.84, + "learning_rate": 5.831162317669513e-06, + "loss": 0.9206, + "step": 347100 + }, + { + "epoch": 4.84, + "learning_rate": 5.824266150266791e-06, + "loss": 0.8967, + "step": 347200 + }, + { + "epoch": 4.84, + "learning_rate": 5.817300324607476e-06, + "loss": 0.8749, + "step": 347300 + }, + { + "epoch": 4.84, + "learning_rate": 5.810334498948161e-06, + "loss": 0.9193, + "step": 347400 + }, + { + "epoch": 4.84, + "learning_rate": 5.8033686732888446e-06, + "loss": 0.8796, + "step": 347500 + }, + { + "epoch": 4.84, + "learning_rate": 5.796402847629529e-06, + "loss": 0.9018, + "step": 347600 + }, + { + "epoch": 4.84, + "learning_rate": 5.789437021970214e-06, + "loss": 0.8721, + "step": 347700 + }, + { + "epoch": 4.85, + "learning_rate": 5.782471196310899e-06, + "loss": 0.9011, + "step": 347800 + }, + { + "epoch": 4.85, + "learning_rate": 5.775505370651583e-06, + "loss": 0.9137, + "step": 347900 + }, + { + "epoch": 4.85, + "learning_rate": 5.768539544992269e-06, + "loss": 0.8995, + "step": 348000 + }, + { + "epoch": 4.85, + "learning_rate": 5.761573719332953e-06, + "loss": 0.9014, + "step": 348100 + }, + { + "epoch": 4.85, + "learning_rate": 5.754607893673637e-06, + "loss": 0.9109, + "step": 348200 + }, + { + "epoch": 4.85, + "learning_rate": 5.747642068014322e-06, + "loss": 0.8916, + "step": 348300 + }, + { + "epoch": 4.85, + "learning_rate": 5.740676242355007e-06, + "loss": 0.8899, + "step": 348400 + }, + { + "epoch": 4.86, + "learning_rate": 5.733710416695691e-06, + "loss": 0.8821, + "step": 348500 + }, + { + "epoch": 4.86, + "learning_rate": 5.726744591036376e-06, + "loss": 0.8959, + "step": 348600 + }, + { + "epoch": 4.86, + "learning_rate": 5.71977876537706e-06, + "loss": 0.8975, + "step": 348700 + }, + { + "epoch": 4.86, + "learning_rate": 5.712812939717745e-06, + "loss": 0.8898, + "step": 348800 + }, + { + "epoch": 4.86, + "learning_rate": 5.705847114058429e-06, + "loss": 0.8829, + "step": 348900 + }, + { + "epoch": 4.86, + "learning_rate": 5.698881288399114e-06, + "loss": 0.8892, + "step": 349000 + }, + { + "epoch": 4.86, + "learning_rate": 5.691915462739799e-06, + "loss": 0.8734, + "step": 349100 + }, + { + "epoch": 4.86, + "learning_rate": 5.684949637080483e-06, + "loss": 0.91, + "step": 349200 + }, + { + "epoch": 4.87, + "learning_rate": 5.677983811421168e-06, + "loss": 0.8991, + "step": 349300 + }, + { + "epoch": 4.87, + "learning_rate": 5.671017985761853e-06, + "loss": 0.9066, + "step": 349400 + }, + { + "epoch": 4.87, + "learning_rate": 5.664052160102537e-06, + "loss": 0.8815, + "step": 349500 + }, + { + "epoch": 4.87, + "learning_rate": 5.657086334443222e-06, + "loss": 0.9071, + "step": 349600 + }, + { + "epoch": 4.87, + "learning_rate": 5.650120508783907e-06, + "loss": 0.902, + "step": 349700 + }, + { + "epoch": 4.87, + "learning_rate": 5.6431546831245906e-06, + "loss": 0.9186, + "step": 349800 + }, + { + "epoch": 4.87, + "learning_rate": 5.636188857465275e-06, + "loss": 0.895, + "step": 349900 + }, + { + "epoch": 4.88, + "learning_rate": 5.62922303180596e-06, + "loss": 0.8841, + "step": 350000 + }, + { + "epoch": 4.88, + "learning_rate": 5.622257206146645e-06, + "loss": 0.8849, + "step": 350100 + }, + { + "epoch": 4.88, + "learning_rate": 5.615291380487329e-06, + "loss": 0.8815, + "step": 350200 + }, + { + "epoch": 4.88, + "learning_rate": 5.608325554828013e-06, + "loss": 0.8902, + "step": 350300 + }, + { + "epoch": 4.88, + "learning_rate": 5.601359729168699e-06, + "loss": 0.9048, + "step": 350400 + }, + { + "epoch": 4.88, + "learning_rate": 5.594393903509383e-06, + "loss": 0.9038, + "step": 350500 + }, + { + "epoch": 4.88, + "learning_rate": 5.587428077850068e-06, + "loss": 0.8853, + "step": 350600 + }, + { + "epoch": 4.89, + "learning_rate": 5.580462252190753e-06, + "loss": 0.9107, + "step": 350700 + }, + { + "epoch": 4.89, + "learning_rate": 5.573496426531437e-06, + "loss": 0.9116, + "step": 350800 + }, + { + "epoch": 4.89, + "learning_rate": 5.566530600872121e-06, + "loss": 0.8804, + "step": 350900 + }, + { + "epoch": 4.89, + "learning_rate": 5.559564775212806e-06, + "loss": 0.8989, + "step": 351000 + }, + { + "epoch": 4.89, + "learning_rate": 5.552598949553491e-06, + "loss": 0.8993, + "step": 351100 + }, + { + "epoch": 4.89, + "learning_rate": 5.545633123894175e-06, + "loss": 0.8658, + "step": 351200 + }, + { + "epoch": 4.89, + "learning_rate": 5.53866729823486e-06, + "loss": 0.8993, + "step": 351300 + }, + { + "epoch": 4.9, + "learning_rate": 5.531701472575545e-06, + "loss": 0.9247, + "step": 351400 + }, + { + "epoch": 4.9, + "learning_rate": 5.5247356469162285e-06, + "loss": 0.8967, + "step": 351500 + }, + { + "epoch": 4.9, + "learning_rate": 5.517769821256913e-06, + "loss": 0.9107, + "step": 351600 + }, + { + "epoch": 4.9, + "learning_rate": 5.510803995597599e-06, + "loss": 0.8948, + "step": 351700 + }, + { + "epoch": 4.9, + "learning_rate": 5.503838169938283e-06, + "loss": 0.8823, + "step": 351800 + }, + { + "epoch": 4.9, + "learning_rate": 5.496872344278968e-06, + "loss": 0.8879, + "step": 351900 + }, + { + "epoch": 4.9, + "learning_rate": 5.489906518619652e-06, + "loss": 0.9149, + "step": 352000 + }, + { + "epoch": 4.91, + "learning_rate": 5.482940692960337e-06, + "loss": 0.8996, + "step": 352100 + }, + { + "epoch": 4.91, + "learning_rate": 5.475974867301021e-06, + "loss": 0.9015, + "step": 352200 + }, + { + "epoch": 4.91, + "learning_rate": 5.469009041641706e-06, + "loss": 0.9153, + "step": 352300 + }, + { + "epoch": 4.91, + "learning_rate": 5.462043215982391e-06, + "loss": 0.883, + "step": 352400 + }, + { + "epoch": 4.91, + "learning_rate": 5.455077390323075e-06, + "loss": 0.8792, + "step": 352500 + }, + { + "epoch": 4.91, + "learning_rate": 5.448111564663759e-06, + "loss": 0.9128, + "step": 352600 + }, + { + "epoch": 4.91, + "learning_rate": 5.441145739004444e-06, + "loss": 0.8933, + "step": 352700 + }, + { + "epoch": 4.92, + "learning_rate": 5.4341799133451285e-06, + "loss": 0.8921, + "step": 352800 + }, + { + "epoch": 4.92, + "learning_rate": 5.427214087685814e-06, + "loss": 0.8886, + "step": 352900 + }, + { + "epoch": 4.92, + "learning_rate": 5.420248262026499e-06, + "loss": 0.8993, + "step": 353000 + }, + { + "epoch": 4.92, + "learning_rate": 5.4132824363671826e-06, + "loss": 0.9102, + "step": 353100 + }, + { + "epoch": 4.92, + "learning_rate": 5.406316610707867e-06, + "loss": 0.904, + "step": 353200 + }, + { + "epoch": 4.92, + "learning_rate": 5.399350785048552e-06, + "loss": 0.8972, + "step": 353300 + }, + { + "epoch": 4.92, + "learning_rate": 5.392384959389237e-06, + "loss": 0.9171, + "step": 353400 + }, + { + "epoch": 4.92, + "learning_rate": 5.385419133729921e-06, + "loss": 0.9246, + "step": 353500 + }, + { + "epoch": 4.93, + "learning_rate": 5.378453308070606e-06, + "loss": 0.9323, + "step": 353600 + }, + { + "epoch": 4.93, + "learning_rate": 5.37148748241129e-06, + "loss": 0.8837, + "step": 353700 + }, + { + "epoch": 4.93, + "learning_rate": 5.364591315008569e-06, + "loss": 0.8957, + "step": 353800 + }, + { + "epoch": 4.93, + "learning_rate": 5.357625489349253e-06, + "loss": 0.875, + "step": 353900 + }, + { + "epoch": 4.93, + "learning_rate": 5.350659663689937e-06, + "loss": 0.8903, + "step": 354000 + }, + { + "epoch": 4.93, + "learning_rate": 5.343693838030622e-06, + "loss": 0.8809, + "step": 354100 + }, + { + "epoch": 4.93, + "learning_rate": 5.336728012371307e-06, + "loss": 0.8936, + "step": 354200 + }, + { + "epoch": 4.94, + "learning_rate": 5.329831844968584e-06, + "loss": 0.8777, + "step": 354300 + }, + { + "epoch": 4.94, + "learning_rate": 5.322866019309269e-06, + "loss": 0.8892, + "step": 354400 + }, + { + "epoch": 4.94, + "learning_rate": 5.315900193649954e-06, + "loss": 0.8496, + "step": 354500 + }, + { + "epoch": 4.94, + "learning_rate": 5.308934367990638e-06, + "loss": 0.8915, + "step": 354600 + }, + { + "epoch": 4.94, + "learning_rate": 5.301968542331323e-06, + "loss": 0.9091, + "step": 354700 + }, + { + "epoch": 4.94, + "learning_rate": 5.295002716672007e-06, + "loss": 0.8996, + "step": 354800 + }, + { + "epoch": 4.94, + "learning_rate": 5.288036891012692e-06, + "loss": 0.8934, + "step": 354900 + }, + { + "epoch": 4.95, + "learning_rate": 5.281071065353377e-06, + "loss": 0.914, + "step": 355000 + }, + { + "epoch": 4.95, + "learning_rate": 5.2741052396940614e-06, + "loss": 0.8762, + "step": 355100 + }, + { + "epoch": 4.95, + "learning_rate": 5.267139414034745e-06, + "loss": 0.9032, + "step": 355200 + }, + { + "epoch": 4.95, + "learning_rate": 5.26017358837543e-06, + "loss": 0.8971, + "step": 355300 + }, + { + "epoch": 4.95, + "learning_rate": 5.253207762716115e-06, + "loss": 0.9071, + "step": 355400 + }, + { + "epoch": 4.95, + "learning_rate": 5.246241937056799e-06, + "loss": 0.9246, + "step": 355500 + }, + { + "epoch": 4.95, + "learning_rate": 5.239276111397484e-06, + "loss": 0.9098, + "step": 355600 + }, + { + "epoch": 4.96, + "learning_rate": 5.232310285738169e-06, + "loss": 0.8697, + "step": 355700 + }, + { + "epoch": 4.96, + "learning_rate": 5.225344460078853e-06, + "loss": 0.914, + "step": 355800 + }, + { + "epoch": 4.96, + "learning_rate": 5.218378634419538e-06, + "loss": 0.8898, + "step": 355900 + }, + { + "epoch": 4.96, + "learning_rate": 5.211412808760223e-06, + "loss": 0.9309, + "step": 356000 + }, + { + "epoch": 4.96, + "learning_rate": 5.204446983100907e-06, + "loss": 0.906, + "step": 356100 + }, + { + "epoch": 4.96, + "learning_rate": 5.197481157441592e-06, + "loss": 0.8984, + "step": 356200 + }, + { + "epoch": 4.96, + "learning_rate": 5.190515331782276e-06, + "loss": 0.9126, + "step": 356300 + }, + { + "epoch": 4.97, + "learning_rate": 5.183549506122961e-06, + "loss": 0.894, + "step": 356400 + }, + { + "epoch": 4.97, + "learning_rate": 5.176583680463645e-06, + "loss": 0.8923, + "step": 356500 + }, + { + "epoch": 4.97, + "learning_rate": 5.16961785480433e-06, + "loss": 0.8962, + "step": 356600 + }, + { + "epoch": 4.97, + "learning_rate": 5.162652029145015e-06, + "loss": 0.8862, + "step": 356700 + }, + { + "epoch": 4.97, + "learning_rate": 5.155686203485699e-06, + "loss": 0.8902, + "step": 356800 + }, + { + "epoch": 4.97, + "learning_rate": 5.148720377826383e-06, + "loss": 0.9039, + "step": 356900 + }, + { + "epoch": 4.97, + "learning_rate": 5.141754552167069e-06, + "loss": 0.8818, + "step": 357000 + }, + { + "epoch": 4.97, + "learning_rate": 5.134788726507753e-06, + "loss": 0.8793, + "step": 357100 + }, + { + "epoch": 4.98, + "learning_rate": 5.127822900848438e-06, + "loss": 0.9053, + "step": 357200 + }, + { + "epoch": 4.98, + "learning_rate": 5.120857075189123e-06, + "loss": 0.8951, + "step": 357300 + }, + { + "epoch": 4.98, + "learning_rate": 5.113891249529807e-06, + "loss": 0.8964, + "step": 357400 + }, + { + "epoch": 4.98, + "learning_rate": 5.106925423870491e-06, + "loss": 0.8916, + "step": 357500 + }, + { + "epoch": 4.98, + "learning_rate": 5.099959598211176e-06, + "loss": 0.8703, + "step": 357600 + }, + { + "epoch": 4.98, + "learning_rate": 5.092993772551861e-06, + "loss": 0.9257, + "step": 357700 + }, + { + "epoch": 4.98, + "learning_rate": 5.086027946892545e-06, + "loss": 0.8877, + "step": 357800 + }, + { + "epoch": 4.99, + "learning_rate": 5.07906212123323e-06, + "loss": 0.8723, + "step": 357900 + }, + { + "epoch": 4.99, + "learning_rate": 5.072096295573914e-06, + "loss": 0.8969, + "step": 358000 + }, + { + "epoch": 4.99, + "learning_rate": 5.0651304699145985e-06, + "loss": 0.8809, + "step": 358100 + }, + { + "epoch": 4.99, + "learning_rate": 5.058164644255284e-06, + "loss": 0.916, + "step": 358200 + }, + { + "epoch": 4.99, + "learning_rate": 5.051198818595969e-06, + "loss": 0.9012, + "step": 358300 + }, + { + "epoch": 4.99, + "learning_rate": 5.0442329929366534e-06, + "loss": 0.9146, + "step": 358400 + }, + { + "epoch": 4.99, + "learning_rate": 5.037267167277337e-06, + "loss": 0.8625, + "step": 358500 + }, + { + "epoch": 5.0, + "learning_rate": 5.030301341618022e-06, + "loss": 0.9046, + "step": 358600 + }, + { + "epoch": 5.0, + "learning_rate": 5.023335515958707e-06, + "loss": 0.8989, + "step": 358700 + }, + { + "epoch": 5.0, + "learning_rate": 5.016369690299391e-06, + "loss": 0.9112, + "step": 358800 + }, + { + "epoch": 5.0, + "eval_gen_len": 20.0, + "eval_loss": 1.1667309999465942, + "eval_rouge1": 12.5404, + "eval_rouge2": 3.7842, + "eval_rougeL": 12.0541, + "eval_rougeLsum": 12.1643, + "eval_runtime": 1508.2531, + "eval_samples_per_second": 8.863, + "eval_steps_per_second": 2.216, + "step": 358895 + }, + { + "epoch": 5.0, + "learning_rate": 5.009403864640076e-06, + "loss": 0.9271, + "step": 358900 + }, + { + "epoch": 5.0, + "learning_rate": 5.002438038980761e-06, + "loss": 0.8275, + "step": 359000 + }, + { + "epoch": 5.0, + "learning_rate": 4.9954722133214445e-06, + "loss": 0.8209, + "step": 359100 + }, + { + "epoch": 5.0, + "learning_rate": 4.9885760459187235e-06, + "loss": 0.8225, + "step": 359200 + }, + { + "epoch": 5.01, + "learning_rate": 4.981610220259407e-06, + "loss": 0.8078, + "step": 359300 + }, + { + "epoch": 5.01, + "learning_rate": 4.974644394600092e-06, + "loss": 0.8394, + "step": 359400 + }, + { + "epoch": 5.01, + "learning_rate": 4.967678568940777e-06, + "loss": 0.8552, + "step": 359500 + }, + { + "epoch": 5.01, + "learning_rate": 4.960712743281461e-06, + "loss": 0.8287, + "step": 359600 + }, + { + "epoch": 5.01, + "learning_rate": 4.953746917622146e-06, + "loss": 0.7978, + "step": 359700 + }, + { + "epoch": 5.01, + "learning_rate": 4.94678109196283e-06, + "loss": 0.8254, + "step": 359800 + }, + { + "epoch": 5.01, + "learning_rate": 4.939815266303515e-06, + "loss": 0.805, + "step": 359900 + }, + { + "epoch": 5.02, + "learning_rate": 4.932849440644199e-06, + "loss": 0.8448, + "step": 360000 + }, + { + "epoch": 5.02, + "learning_rate": 4.925883614984884e-06, + "loss": 0.8456, + "step": 360100 + }, + { + "epoch": 5.02, + "learning_rate": 4.9189177893255695e-06, + "loss": 0.8382, + "step": 360200 + }, + { + "epoch": 5.02, + "learning_rate": 4.911951963666254e-06, + "loss": 0.8349, + "step": 360300 + }, + { + "epoch": 5.02, + "learning_rate": 4.904986138006938e-06, + "loss": 0.8105, + "step": 360400 + }, + { + "epoch": 5.02, + "learning_rate": 4.898020312347623e-06, + "loss": 0.8326, + "step": 360500 + }, + { + "epoch": 5.02, + "learning_rate": 4.891054486688307e-06, + "loss": 0.8429, + "step": 360600 + }, + { + "epoch": 5.03, + "learning_rate": 4.884088661028992e-06, + "loss": 0.8391, + "step": 360700 + }, + { + "epoch": 5.03, + "learning_rate": 4.877122835369677e-06, + "loss": 0.8196, + "step": 360800 + }, + { + "epoch": 5.03, + "learning_rate": 4.870226667966954e-06, + "loss": 0.8342, + "step": 360900 + }, + { + "epoch": 5.03, + "learning_rate": 4.8632608423076395e-06, + "loss": 0.8288, + "step": 361000 + }, + { + "epoch": 5.03, + "learning_rate": 4.856295016648323e-06, + "loss": 0.8454, + "step": 361100 + }, + { + "epoch": 5.03, + "learning_rate": 4.849329190989008e-06, + "loss": 0.8364, + "step": 361200 + }, + { + "epoch": 5.03, + "learning_rate": 4.842363365329693e-06, + "loss": 0.8249, + "step": 361300 + }, + { + "epoch": 5.03, + "learning_rate": 4.8353975396703774e-06, + "loss": 0.7996, + "step": 361400 + }, + { + "epoch": 5.04, + "learning_rate": 4.828431714011062e-06, + "loss": 0.8294, + "step": 361500 + }, + { + "epoch": 5.04, + "learning_rate": 4.821465888351747e-06, + "loss": 0.816, + "step": 361600 + }, + { + "epoch": 5.04, + "learning_rate": 4.814500062692431e-06, + "loss": 0.8391, + "step": 361700 + }, + { + "epoch": 5.04, + "learning_rate": 4.807534237033115e-06, + "loss": 0.8331, + "step": 361800 + }, + { + "epoch": 5.04, + "learning_rate": 4.8005684113738e-06, + "loss": 0.8248, + "step": 361900 + }, + { + "epoch": 5.04, + "learning_rate": 4.793602585714485e-06, + "loss": 0.8309, + "step": 362000 + }, + { + "epoch": 5.04, + "learning_rate": 4.786636760055169e-06, + "loss": 0.8254, + "step": 362100 + }, + { + "epoch": 5.05, + "learning_rate": 4.779670934395854e-06, + "loss": 0.8449, + "step": 362200 + }, + { + "epoch": 5.05, + "learning_rate": 4.772705108736539e-06, + "loss": 0.8179, + "step": 362300 + }, + { + "epoch": 5.05, + "learning_rate": 4.765739283077223e-06, + "loss": 0.8216, + "step": 362400 + }, + { + "epoch": 5.05, + "learning_rate": 4.758773457417908e-06, + "loss": 0.8372, + "step": 362500 + }, + { + "epoch": 5.05, + "learning_rate": 4.751807631758593e-06, + "loss": 0.8232, + "step": 362600 + }, + { + "epoch": 5.05, + "learning_rate": 4.7448418060992775e-06, + "loss": 0.8381, + "step": 362700 + }, + { + "epoch": 5.05, + "learning_rate": 4.737875980439961e-06, + "loss": 0.8593, + "step": 362800 + }, + { + "epoch": 5.06, + "learning_rate": 4.730910154780646e-06, + "loss": 0.8482, + "step": 362900 + }, + { + "epoch": 5.06, + "learning_rate": 4.723944329121331e-06, + "loss": 0.8209, + "step": 363000 + }, + { + "epoch": 5.06, + "learning_rate": 4.716978503462015e-06, + "loss": 0.8023, + "step": 363100 + }, + { + "epoch": 5.06, + "learning_rate": 4.7100126778027e-06, + "loss": 0.8405, + "step": 363200 + }, + { + "epoch": 5.06, + "learning_rate": 4.703046852143385e-06, + "loss": 0.8098, + "step": 363300 + }, + { + "epoch": 5.06, + "learning_rate": 4.6960810264840685e-06, + "loss": 0.8205, + "step": 363400 + }, + { + "epoch": 5.06, + "learning_rate": 4.689115200824754e-06, + "loss": 0.8516, + "step": 363500 + }, + { + "epoch": 5.07, + "learning_rate": 4.682149375165439e-06, + "loss": 0.8504, + "step": 363600 + }, + { + "epoch": 5.07, + "learning_rate": 4.6751835495061234e-06, + "loss": 0.8465, + "step": 363700 + }, + { + "epoch": 5.07, + "learning_rate": 4.668217723846808e-06, + "loss": 0.8444, + "step": 363800 + }, + { + "epoch": 5.07, + "learning_rate": 4.661251898187492e-06, + "loss": 0.8536, + "step": 363900 + }, + { + "epoch": 5.07, + "learning_rate": 4.654286072528177e-06, + "loss": 0.7937, + "step": 364000 + }, + { + "epoch": 5.07, + "learning_rate": 4.647320246868861e-06, + "loss": 0.8394, + "step": 364100 + }, + { + "epoch": 5.07, + "learning_rate": 4.640354421209546e-06, + "loss": 0.8356, + "step": 364200 + }, + { + "epoch": 5.08, + "learning_rate": 4.633388595550231e-06, + "loss": 0.8217, + "step": 364300 + }, + { + "epoch": 5.08, + "learning_rate": 4.626422769890915e-06, + "loss": 0.8573, + "step": 364400 + }, + { + "epoch": 5.08, + "learning_rate": 4.619456944231599e-06, + "loss": 0.8214, + "step": 364500 + }, + { + "epoch": 5.08, + "learning_rate": 4.612491118572284e-06, + "loss": 0.8246, + "step": 364600 + }, + { + "epoch": 5.08, + "learning_rate": 4.6055252929129694e-06, + "loss": 0.8251, + "step": 364700 + }, + { + "epoch": 5.08, + "learning_rate": 4.598559467253654e-06, + "loss": 0.84, + "step": 364800 + }, + { + "epoch": 5.08, + "learning_rate": 4.591593641594339e-06, + "loss": 0.8238, + "step": 364900 + }, + { + "epoch": 5.09, + "learning_rate": 4.5846278159350235e-06, + "loss": 0.8194, + "step": 365000 + }, + { + "epoch": 5.09, + "learning_rate": 4.577661990275707e-06, + "loss": 0.8537, + "step": 365100 + }, + { + "epoch": 5.09, + "learning_rate": 4.570696164616392e-06, + "loss": 0.8201, + "step": 365200 + }, + { + "epoch": 5.09, + "learning_rate": 4.563730338957077e-06, + "loss": 0.8323, + "step": 365300 + }, + { + "epoch": 5.09, + "learning_rate": 4.556764513297761e-06, + "loss": 0.8201, + "step": 365400 + }, + { + "epoch": 5.09, + "learning_rate": 4.549798687638446e-06, + "loss": 0.8394, + "step": 365500 + }, + { + "epoch": 5.09, + "learning_rate": 4.54283286197913e-06, + "loss": 0.8441, + "step": 365600 + }, + { + "epoch": 5.09, + "learning_rate": 4.5358670363198146e-06, + "loss": 0.8349, + "step": 365700 + }, + { + "epoch": 5.1, + "learning_rate": 4.528901210660499e-06, + "loss": 0.8408, + "step": 365800 + }, + { + "epoch": 5.1, + "learning_rate": 4.521935385001185e-06, + "loss": 0.8681, + "step": 365900 + }, + { + "epoch": 5.1, + "learning_rate": 4.5149695593418695e-06, + "loss": 0.8023, + "step": 366000 + }, + { + "epoch": 5.1, + "learning_rate": 4.508003733682554e-06, + "loss": 0.8192, + "step": 366100 + }, + { + "epoch": 5.1, + "learning_rate": 4.501037908023238e-06, + "loss": 0.842, + "step": 366200 + }, + { + "epoch": 5.1, + "learning_rate": 4.494072082363923e-06, + "loss": 0.8262, + "step": 366300 + }, + { + "epoch": 5.1, + "learning_rate": 4.487106256704607e-06, + "loss": 0.8162, + "step": 366400 + }, + { + "epoch": 5.11, + "learning_rate": 4.480140431045292e-06, + "loss": 0.8301, + "step": 366500 + }, + { + "epoch": 5.11, + "learning_rate": 4.473174605385977e-06, + "loss": 0.8343, + "step": 366600 + }, + { + "epoch": 5.11, + "learning_rate": 4.466208779726661e-06, + "loss": 0.8363, + "step": 366700 + }, + { + "epoch": 5.11, + "learning_rate": 4.4593126123239395e-06, + "loss": 0.8299, + "step": 366800 + }, + { + "epoch": 5.11, + "learning_rate": 4.452346786664623e-06, + "loss": 0.8213, + "step": 366900 + }, + { + "epoch": 5.11, + "learning_rate": 4.445380961005308e-06, + "loss": 0.8165, + "step": 367000 + }, + { + "epoch": 5.11, + "learning_rate": 4.438415135345993e-06, + "loss": 0.8285, + "step": 367100 + }, + { + "epoch": 5.12, + "learning_rate": 4.431449309686677e-06, + "loss": 0.8347, + "step": 367200 + }, + { + "epoch": 5.12, + "learning_rate": 4.424483484027362e-06, + "loss": 0.8282, + "step": 367300 + }, + { + "epoch": 5.12, + "learning_rate": 4.417517658368047e-06, + "loss": 0.8539, + "step": 367400 + }, + { + "epoch": 5.12, + "learning_rate": 4.410551832708731e-06, + "loss": 0.8361, + "step": 367500 + }, + { + "epoch": 5.12, + "learning_rate": 4.403586007049415e-06, + "loss": 0.8209, + "step": 367600 + }, + { + "epoch": 5.12, + "learning_rate": 4.3966201813901e-06, + "loss": 0.8303, + "step": 367700 + }, + { + "epoch": 5.12, + "learning_rate": 4.389654355730785e-06, + "loss": 0.8435, + "step": 367800 + }, + { + "epoch": 5.13, + "learning_rate": 4.382688530071469e-06, + "loss": 0.8139, + "step": 367900 + }, + { + "epoch": 5.13, + "learning_rate": 4.375722704412155e-06, + "loss": 0.8544, + "step": 368000 + }, + { + "epoch": 5.13, + "learning_rate": 4.368756878752839e-06, + "loss": 0.8244, + "step": 368100 + }, + { + "epoch": 5.13, + "learning_rate": 4.361791053093523e-06, + "loss": 0.8323, + "step": 368200 + }, + { + "epoch": 5.13, + "learning_rate": 4.354825227434208e-06, + "loss": 0.8592, + "step": 368300 + }, + { + "epoch": 5.13, + "learning_rate": 4.347859401774893e-06, + "loss": 0.8287, + "step": 368400 + }, + { + "epoch": 5.13, + "learning_rate": 4.3408935761155774e-06, + "loss": 0.8361, + "step": 368500 + }, + { + "epoch": 5.14, + "learning_rate": 4.333927750456261e-06, + "loss": 0.823, + "step": 368600 + }, + { + "epoch": 5.14, + "learning_rate": 4.326961924796946e-06, + "loss": 0.8323, + "step": 368700 + }, + { + "epoch": 5.14, + "learning_rate": 4.319996099137631e-06, + "loss": 0.8575, + "step": 368800 + }, + { + "epoch": 5.14, + "learning_rate": 4.313030273478315e-06, + "loss": 0.8333, + "step": 368900 + }, + { + "epoch": 5.14, + "learning_rate": 4.306064447819e-06, + "loss": 0.8535, + "step": 369000 + }, + { + "epoch": 5.14, + "learning_rate": 4.299098622159685e-06, + "loss": 0.8152, + "step": 369100 + }, + { + "epoch": 5.14, + "learning_rate": 4.292132796500369e-06, + "loss": 0.8207, + "step": 369200 + }, + { + "epoch": 5.14, + "learning_rate": 4.285166970841054e-06, + "loss": 0.8351, + "step": 369300 + }, + { + "epoch": 5.15, + "learning_rate": 4.278201145181739e-06, + "loss": 0.8194, + "step": 369400 + }, + { + "epoch": 5.15, + "learning_rate": 4.2712353195224234e-06, + "loss": 0.8532, + "step": 369500 + }, + { + "epoch": 5.15, + "learning_rate": 4.264339152119701e-06, + "loss": 0.8306, + "step": 369600 + }, + { + "epoch": 5.15, + "learning_rate": 4.257373326460385e-06, + "loss": 0.8295, + "step": 369700 + }, + { + "epoch": 5.15, + "learning_rate": 4.25040750080107e-06, + "loss": 0.8504, + "step": 369800 + }, + { + "epoch": 5.15, + "learning_rate": 4.243441675141754e-06, + "loss": 0.8283, + "step": 369900 + }, + { + "epoch": 5.15, + "learning_rate": 4.2364758494824394e-06, + "loss": 0.8485, + "step": 370000 + }, + { + "epoch": 5.16, + "learning_rate": 4.229510023823124e-06, + "loss": 0.81, + "step": 370100 + }, + { + "epoch": 5.16, + "learning_rate": 4.222544198163809e-06, + "loss": 0.8437, + "step": 370200 + }, + { + "epoch": 5.16, + "learning_rate": 4.2155783725044935e-06, + "loss": 0.8365, + "step": 370300 + }, + { + "epoch": 5.16, + "learning_rate": 4.208612546845178e-06, + "loss": 0.8538, + "step": 370400 + }, + { + "epoch": 5.16, + "learning_rate": 4.201646721185862e-06, + "loss": 0.8265, + "step": 370500 + }, + { + "epoch": 5.16, + "learning_rate": 4.194680895526547e-06, + "loss": 0.8176, + "step": 370600 + }, + { + "epoch": 5.16, + "learning_rate": 4.187715069867231e-06, + "loss": 0.8338, + "step": 370700 + }, + { + "epoch": 5.17, + "learning_rate": 4.180749244207916e-06, + "loss": 0.8237, + "step": 370800 + }, + { + "epoch": 5.17, + "learning_rate": 4.173783418548601e-06, + "loss": 0.8213, + "step": 370900 + }, + { + "epoch": 5.17, + "learning_rate": 4.166817592889285e-06, + "loss": 0.8419, + "step": 371000 + }, + { + "epoch": 5.17, + "learning_rate": 4.159851767229969e-06, + "loss": 0.8052, + "step": 371100 + }, + { + "epoch": 5.17, + "learning_rate": 4.152885941570655e-06, + "loss": 0.7705, + "step": 371200 + }, + { + "epoch": 5.17, + "learning_rate": 4.1459201159113395e-06, + "loss": 0.8226, + "step": 371300 + }, + { + "epoch": 5.17, + "learning_rate": 4.138954290252024e-06, + "loss": 0.8284, + "step": 371400 + }, + { + "epoch": 5.18, + "learning_rate": 4.131988464592709e-06, + "loss": 0.836, + "step": 371500 + }, + { + "epoch": 5.18, + "learning_rate": 4.125022638933393e-06, + "loss": 0.8067, + "step": 371600 + }, + { + "epoch": 5.18, + "learning_rate": 4.118056813274077e-06, + "loss": 0.8206, + "step": 371700 + }, + { + "epoch": 5.18, + "learning_rate": 4.111090987614762e-06, + "loss": 0.8185, + "step": 371800 + }, + { + "epoch": 5.18, + "learning_rate": 4.104125161955447e-06, + "loss": 0.8344, + "step": 371900 + }, + { + "epoch": 5.18, + "learning_rate": 4.097159336296131e-06, + "loss": 0.8392, + "step": 372000 + }, + { + "epoch": 5.18, + "learning_rate": 4.090193510636816e-06, + "loss": 0.8224, + "step": 372100 + }, + { + "epoch": 5.19, + "learning_rate": 4.0832276849775e-06, + "loss": 0.8247, + "step": 372200 + }, + { + "epoch": 5.19, + "learning_rate": 4.076261859318185e-06, + "loss": 0.8541, + "step": 372300 + }, + { + "epoch": 5.19, + "learning_rate": 4.069296033658869e-06, + "loss": 0.8669, + "step": 372400 + }, + { + "epoch": 5.19, + "learning_rate": 4.062330207999555e-06, + "loss": 0.8228, + "step": 372500 + }, + { + "epoch": 5.19, + "learning_rate": 4.0553643823402395e-06, + "loss": 0.8359, + "step": 372600 + }, + { + "epoch": 5.19, + "learning_rate": 4.048398556680923e-06, + "loss": 0.8489, + "step": 372700 + }, + { + "epoch": 5.19, + "learning_rate": 4.041432731021608e-06, + "loss": 0.8537, + "step": 372800 + }, + { + "epoch": 5.2, + "learning_rate": 4.034536563618885e-06, + "loss": 0.8625, + "step": 372900 + }, + { + "epoch": 5.2, + "learning_rate": 4.02757073795957e-06, + "loss": 0.8186, + "step": 373000 + }, + { + "epoch": 5.2, + "learning_rate": 4.020604912300255e-06, + "loss": 0.8274, + "step": 373100 + }, + { + "epoch": 5.2, + "learning_rate": 4.013708744897533e-06, + "loss": 0.8246, + "step": 373200 + }, + { + "epoch": 5.2, + "learning_rate": 4.0067429192382175e-06, + "loss": 0.8173, + "step": 373300 + }, + { + "epoch": 5.2, + "learning_rate": 3.999777093578902e-06, + "loss": 0.8404, + "step": 373400 + }, + { + "epoch": 5.2, + "learning_rate": 3.992811267919587e-06, + "loss": 0.8254, + "step": 373500 + }, + { + "epoch": 5.2, + "learning_rate": 3.985845442260271e-06, + "loss": 0.8032, + "step": 373600 + }, + { + "epoch": 5.21, + "learning_rate": 3.978879616600955e-06, + "loss": 0.81, + "step": 373700 + }, + { + "epoch": 5.21, + "learning_rate": 3.97191379094164e-06, + "loss": 0.8396, + "step": 373800 + }, + { + "epoch": 5.21, + "learning_rate": 3.964947965282325e-06, + "loss": 0.8314, + "step": 373900 + }, + { + "epoch": 5.21, + "learning_rate": 3.957982139623009e-06, + "loss": 0.8444, + "step": 374000 + }, + { + "epoch": 5.21, + "learning_rate": 3.951016313963695e-06, + "loss": 0.8479, + "step": 374100 + }, + { + "epoch": 5.21, + "learning_rate": 3.944050488304379e-06, + "loss": 0.8289, + "step": 374200 + }, + { + "epoch": 5.21, + "learning_rate": 3.9370846626450635e-06, + "loss": 0.8545, + "step": 374300 + }, + { + "epoch": 5.22, + "learning_rate": 3.930118836985748e-06, + "loss": 0.8515, + "step": 374400 + }, + { + "epoch": 5.22, + "learning_rate": 3.923153011326433e-06, + "loss": 0.8247, + "step": 374500 + }, + { + "epoch": 5.22, + "learning_rate": 3.9161871856671175e-06, + "loss": 0.8546, + "step": 374600 + }, + { + "epoch": 5.22, + "learning_rate": 3.909221360007802e-06, + "loss": 0.8536, + "step": 374700 + }, + { + "epoch": 5.22, + "learning_rate": 3.902255534348486e-06, + "loss": 0.8446, + "step": 374800 + }, + { + "epoch": 5.22, + "learning_rate": 3.895289708689171e-06, + "loss": 0.8714, + "step": 374900 + }, + { + "epoch": 5.22, + "learning_rate": 3.888323883029855e-06, + "loss": 0.8566, + "step": 375000 + }, + { + "epoch": 5.23, + "learning_rate": 3.88135805737054e-06, + "loss": 0.8568, + "step": 375100 + }, + { + "epoch": 5.23, + "learning_rate": 3.874392231711225e-06, + "loss": 0.8445, + "step": 375200 + }, + { + "epoch": 5.23, + "learning_rate": 3.8674264060519094e-06, + "loss": 0.8545, + "step": 375300 + }, + { + "epoch": 5.23, + "learning_rate": 3.860460580392594e-06, + "loss": 0.8354, + "step": 375400 + }, + { + "epoch": 5.23, + "learning_rate": 3.853494754733279e-06, + "loss": 0.8702, + "step": 375500 + }, + { + "epoch": 5.23, + "learning_rate": 3.8465289290739635e-06, + "loss": 0.83, + "step": 375600 + }, + { + "epoch": 5.23, + "learning_rate": 3.839563103414648e-06, + "loss": 0.8258, + "step": 375700 + }, + { + "epoch": 5.24, + "learning_rate": 3.832597277755333e-06, + "loss": 0.8523, + "step": 375800 + }, + { + "epoch": 5.24, + "learning_rate": 3.825631452096017e-06, + "loss": 0.8535, + "step": 375900 + }, + { + "epoch": 5.24, + "learning_rate": 3.818665626436701e-06, + "loss": 0.8125, + "step": 376000 + }, + { + "epoch": 5.24, + "learning_rate": 3.811699800777386e-06, + "loss": 0.8218, + "step": 376100 + }, + { + "epoch": 5.24, + "learning_rate": 3.804733975118071e-06, + "loss": 0.8384, + "step": 376200 + }, + { + "epoch": 5.24, + "learning_rate": 3.797768149458756e-06, + "loss": 0.8437, + "step": 376300 + }, + { + "epoch": 5.24, + "learning_rate": 3.7908023237994397e-06, + "loss": 0.8364, + "step": 376400 + }, + { + "epoch": 5.25, + "learning_rate": 3.7838364981401244e-06, + "loss": 0.8538, + "step": 376500 + }, + { + "epoch": 5.25, + "learning_rate": 3.776870672480809e-06, + "loss": 0.8263, + "step": 376600 + }, + { + "epoch": 5.25, + "learning_rate": 3.7699048468214937e-06, + "loss": 0.8514, + "step": 376700 + }, + { + "epoch": 5.25, + "learning_rate": 3.7629390211621784e-06, + "loss": 0.8167, + "step": 376800 + }, + { + "epoch": 5.25, + "learning_rate": 3.7559731955028635e-06, + "loss": 0.8557, + "step": 376900 + }, + { + "epoch": 5.25, + "learning_rate": 3.749007369843548e-06, + "loss": 0.8237, + "step": 377000 + }, + { + "epoch": 5.25, + "learning_rate": 3.742041544184232e-06, + "loss": 0.8251, + "step": 377100 + }, + { + "epoch": 5.26, + "learning_rate": 3.7350757185249167e-06, + "loss": 0.8271, + "step": 377200 + }, + { + "epoch": 5.26, + "learning_rate": 3.7281098928656014e-06, + "loss": 0.8176, + "step": 377300 + }, + { + "epoch": 5.26, + "learning_rate": 3.7211440672062857e-06, + "loss": 0.8048, + "step": 377400 + }, + { + "epoch": 5.26, + "learning_rate": 3.7141782415469708e-06, + "loss": 0.8213, + "step": 377500 + }, + { + "epoch": 5.26, + "learning_rate": 3.7072124158876555e-06, + "loss": 0.8204, + "step": 377600 + }, + { + "epoch": 5.26, + "learning_rate": 3.7002465902283397e-06, + "loss": 0.8593, + "step": 377700 + }, + { + "epoch": 5.26, + "learning_rate": 3.6932807645690244e-06, + "loss": 0.8453, + "step": 377800 + }, + { + "epoch": 5.26, + "learning_rate": 3.686314938909709e-06, + "loss": 0.852, + "step": 377900 + }, + { + "epoch": 5.27, + "learning_rate": 3.6793491132503934e-06, + "loss": 0.8347, + "step": 378000 + }, + { + "epoch": 5.27, + "learning_rate": 3.6723832875910785e-06, + "loss": 0.8226, + "step": 378100 + }, + { + "epoch": 5.27, + "learning_rate": 3.665417461931763e-06, + "loss": 0.8266, + "step": 378200 + }, + { + "epoch": 5.27, + "learning_rate": 3.6584516362724474e-06, + "loss": 0.8069, + "step": 378300 + }, + { + "epoch": 5.27, + "learning_rate": 3.651555468869725e-06, + "loss": 0.8233, + "step": 378400 + }, + { + "epoch": 5.27, + "learning_rate": 3.64458964321041e-06, + "loss": 0.8328, + "step": 378500 + }, + { + "epoch": 5.27, + "learning_rate": 3.6376238175510945e-06, + "loss": 0.8203, + "step": 378600 + }, + { + "epoch": 5.28, + "learning_rate": 3.6306579918917787e-06, + "loss": 0.8252, + "step": 378700 + }, + { + "epoch": 5.28, + "learning_rate": 3.6236921662324634e-06, + "loss": 0.831, + "step": 378800 + }, + { + "epoch": 5.28, + "learning_rate": 3.6167263405731485e-06, + "loss": 0.8212, + "step": 378900 + }, + { + "epoch": 5.28, + "learning_rate": 3.6097605149138328e-06, + "loss": 0.8431, + "step": 379000 + }, + { + "epoch": 5.28, + "learning_rate": 3.6027946892545175e-06, + "loss": 0.81, + "step": 379100 + }, + { + "epoch": 5.28, + "learning_rate": 3.595828863595202e-06, + "loss": 0.8325, + "step": 379200 + }, + { + "epoch": 5.28, + "learning_rate": 3.5888630379358864e-06, + "loss": 0.8471, + "step": 379300 + }, + { + "epoch": 5.29, + "learning_rate": 3.581897212276571e-06, + "loss": 0.8566, + "step": 379400 + }, + { + "epoch": 5.29, + "learning_rate": 3.574931386617256e-06, + "loss": 0.8169, + "step": 379500 + }, + { + "epoch": 5.29, + "learning_rate": 3.5679655609579405e-06, + "loss": 0.8231, + "step": 379600 + }, + { + "epoch": 5.29, + "learning_rate": 3.560999735298625e-06, + "loss": 0.8377, + "step": 379700 + }, + { + "epoch": 5.29, + "learning_rate": 3.5540339096393094e-06, + "loss": 0.8441, + "step": 379800 + }, + { + "epoch": 5.29, + "learning_rate": 3.547068083979994e-06, + "loss": 0.834, + "step": 379900 + }, + { + "epoch": 5.29, + "learning_rate": 3.5401022583206788e-06, + "loss": 0.8248, + "step": 380000 + }, + { + "epoch": 5.3, + "learning_rate": 3.5331364326613634e-06, + "loss": 0.8165, + "step": 380100 + }, + { + "epoch": 5.3, + "learning_rate": 3.526170607002048e-06, + "loss": 0.8424, + "step": 380200 + }, + { + "epoch": 5.3, + "learning_rate": 3.519204781342733e-06, + "loss": 0.8317, + "step": 380300 + }, + { + "epoch": 5.3, + "learning_rate": 3.512238955683417e-06, + "loss": 0.8206, + "step": 380400 + }, + { + "epoch": 5.3, + "learning_rate": 3.505342788280695e-06, + "loss": 0.8332, + "step": 380500 + }, + { + "epoch": 5.3, + "learning_rate": 3.4983769626213795e-06, + "loss": 0.8391, + "step": 380600 + }, + { + "epoch": 5.3, + "learning_rate": 3.491411136962064e-06, + "loss": 0.8209, + "step": 380700 + }, + { + "epoch": 5.31, + "learning_rate": 3.484445311302749e-06, + "loss": 0.8131, + "step": 380800 + }, + { + "epoch": 5.31, + "learning_rate": 3.4774794856434335e-06, + "loss": 0.8374, + "step": 380900 + }, + { + "epoch": 5.31, + "learning_rate": 3.470513659984118e-06, + "loss": 0.8362, + "step": 381000 + }, + { + "epoch": 5.31, + "learning_rate": 3.4635478343248025e-06, + "loss": 0.8236, + "step": 381100 + }, + { + "epoch": 5.31, + "learning_rate": 3.456582008665487e-06, + "loss": 0.8184, + "step": 381200 + }, + { + "epoch": 5.31, + "learning_rate": 3.449616183006172e-06, + "loss": 0.8451, + "step": 381300 + }, + { + "epoch": 5.31, + "learning_rate": 3.442650357346856e-06, + "loss": 0.821, + "step": 381400 + }, + { + "epoch": 5.31, + "learning_rate": 3.435684531687541e-06, + "loss": 0.8402, + "step": 381500 + }, + { + "epoch": 5.32, + "learning_rate": 3.428718706028226e-06, + "loss": 0.8643, + "step": 381600 + }, + { + "epoch": 5.32, + "learning_rate": 3.42175288036891e-06, + "loss": 0.8172, + "step": 381700 + }, + { + "epoch": 5.32, + "learning_rate": 3.414787054709595e-06, + "loss": 0.8328, + "step": 381800 + }, + { + "epoch": 5.32, + "learning_rate": 3.4078212290502795e-06, + "loss": 0.8191, + "step": 381900 + }, + { + "epoch": 5.32, + "learning_rate": 3.4008554033909638e-06, + "loss": 0.8503, + "step": 382000 + }, + { + "epoch": 5.32, + "learning_rate": 3.393889577731649e-06, + "loss": 0.8623, + "step": 382100 + }, + { + "epoch": 5.32, + "learning_rate": 3.3869237520723335e-06, + "loss": 0.8358, + "step": 382200 + }, + { + "epoch": 5.33, + "learning_rate": 3.379957926413018e-06, + "loss": 0.8478, + "step": 382300 + }, + { + "epoch": 5.33, + "learning_rate": 3.3729921007537025e-06, + "loss": 0.825, + "step": 382400 + }, + { + "epoch": 5.33, + "learning_rate": 3.3660262750943867e-06, + "loss": 0.8273, + "step": 382500 + }, + { + "epoch": 5.33, + "learning_rate": 3.3590604494350714e-06, + "loss": 0.8264, + "step": 382600 + }, + { + "epoch": 5.33, + "learning_rate": 3.352094623775756e-06, + "loss": 0.8397, + "step": 382700 + }, + { + "epoch": 5.33, + "learning_rate": 3.345128798116441e-06, + "loss": 0.8351, + "step": 382800 + }, + { + "epoch": 5.33, + "learning_rate": 3.3381629724571255e-06, + "loss": 0.8359, + "step": 382900 + }, + { + "epoch": 5.34, + "learning_rate": 3.33119714679781e-06, + "loss": 0.8243, + "step": 383000 + }, + { + "epoch": 5.34, + "learning_rate": 3.3242313211384944e-06, + "loss": 0.8176, + "step": 383100 + }, + { + "epoch": 5.34, + "learning_rate": 3.317265495479179e-06, + "loss": 0.8401, + "step": 383200 + }, + { + "epoch": 5.34, + "learning_rate": 3.310299669819864e-06, + "loss": 0.825, + "step": 383300 + }, + { + "epoch": 5.34, + "learning_rate": 3.3033338441605485e-06, + "loss": 0.8245, + "step": 383400 + }, + { + "epoch": 5.34, + "learning_rate": 3.296368018501233e-06, + "loss": 0.8646, + "step": 383500 + }, + { + "epoch": 5.34, + "learning_rate": 3.289402192841918e-06, + "loss": 0.8621, + "step": 383600 + }, + { + "epoch": 5.35, + "learning_rate": 3.282436367182602e-06, + "loss": 0.83, + "step": 383700 + }, + { + "epoch": 5.35, + "learning_rate": 3.2754705415232868e-06, + "loss": 0.8519, + "step": 383800 + }, + { + "epoch": 5.35, + "learning_rate": 3.268504715863971e-06, + "loss": 0.8233, + "step": 383900 + }, + { + "epoch": 5.35, + "learning_rate": 3.261538890204656e-06, + "loss": 0.8262, + "step": 384000 + }, + { + "epoch": 5.35, + "learning_rate": 3.254573064545341e-06, + "loss": 0.8491, + "step": 384100 + }, + { + "epoch": 5.35, + "learning_rate": 3.2476768971426185e-06, + "loss": 0.8227, + "step": 384200 + }, + { + "epoch": 5.35, + "learning_rate": 3.240780729739896e-06, + "loss": 0.8404, + "step": 384300 + }, + { + "epoch": 5.36, + "learning_rate": 3.233814904080581e-06, + "loss": 0.8342, + "step": 384400 + }, + { + "epoch": 5.36, + "learning_rate": 3.2268490784212656e-06, + "loss": 0.8264, + "step": 384500 + }, + { + "epoch": 5.36, + "learning_rate": 3.21988325276195e-06, + "loss": 0.8257, + "step": 384600 + }, + { + "epoch": 5.36, + "learning_rate": 3.2129174271026345e-06, + "loss": 0.8442, + "step": 384700 + }, + { + "epoch": 5.36, + "learning_rate": 3.2059516014433192e-06, + "loss": 0.8405, + "step": 384800 + }, + { + "epoch": 5.36, + "learning_rate": 3.1989857757840035e-06, + "loss": 0.8456, + "step": 384900 + }, + { + "epoch": 5.36, + "learning_rate": 3.1920199501246886e-06, + "loss": 0.8348, + "step": 385000 + }, + { + "epoch": 5.37, + "learning_rate": 3.1850541244653733e-06, + "loss": 0.8129, + "step": 385100 + }, + { + "epoch": 5.37, + "learning_rate": 3.1780882988060575e-06, + "loss": 0.847, + "step": 385200 + }, + { + "epoch": 5.37, + "learning_rate": 3.1711224731467422e-06, + "loss": 0.8226, + "step": 385300 + }, + { + "epoch": 5.37, + "learning_rate": 3.1641566474874265e-06, + "loss": 0.8348, + "step": 385400 + }, + { + "epoch": 5.37, + "learning_rate": 3.157190821828111e-06, + "loss": 0.821, + "step": 385500 + }, + { + "epoch": 5.37, + "learning_rate": 3.1502249961687963e-06, + "loss": 0.8228, + "step": 385600 + }, + { + "epoch": 5.37, + "learning_rate": 3.1432591705094805e-06, + "loss": 0.8163, + "step": 385700 + }, + { + "epoch": 5.37, + "learning_rate": 3.136293344850165e-06, + "loss": 0.8451, + "step": 385800 + }, + { + "epoch": 5.38, + "learning_rate": 3.12932751919085e-06, + "loss": 0.8191, + "step": 385900 + }, + { + "epoch": 5.38, + "learning_rate": 3.122361693531534e-06, + "loss": 0.8147, + "step": 386000 + }, + { + "epoch": 5.38, + "learning_rate": 3.115395867872219e-06, + "loss": 0.8197, + "step": 386100 + }, + { + "epoch": 5.38, + "learning_rate": 3.108430042212904e-06, + "loss": 0.8634, + "step": 386200 + }, + { + "epoch": 5.38, + "learning_rate": 3.101464216553588e-06, + "loss": 0.8023, + "step": 386300 + }, + { + "epoch": 5.38, + "learning_rate": 3.094498390894273e-06, + "loss": 0.8237, + "step": 386400 + }, + { + "epoch": 5.38, + "learning_rate": 3.0875325652349576e-06, + "loss": 0.8171, + "step": 386500 + }, + { + "epoch": 5.39, + "learning_rate": 3.080566739575642e-06, + "loss": 0.798, + "step": 386600 + }, + { + "epoch": 5.39, + "learning_rate": 3.0736009139163265e-06, + "loss": 0.8277, + "step": 386700 + }, + { + "epoch": 5.39, + "learning_rate": 3.066635088257011e-06, + "loss": 0.8288, + "step": 386800 + }, + { + "epoch": 5.39, + "learning_rate": 3.059669262597696e-06, + "loss": 0.8021, + "step": 386900 + }, + { + "epoch": 5.39, + "learning_rate": 3.0527034369383806e-06, + "loss": 0.8283, + "step": 387000 + }, + { + "epoch": 5.39, + "learning_rate": 3.045737611279065e-06, + "loss": 0.8425, + "step": 387100 + }, + { + "epoch": 5.39, + "learning_rate": 3.0387717856197495e-06, + "loss": 0.818, + "step": 387200 + }, + { + "epoch": 5.4, + "learning_rate": 3.031805959960434e-06, + "loss": 0.8403, + "step": 387300 + }, + { + "epoch": 5.4, + "learning_rate": 3.024840134301119e-06, + "loss": 0.8149, + "step": 387400 + }, + { + "epoch": 5.4, + "learning_rate": 3.0178743086418036e-06, + "loss": 0.8069, + "step": 387500 + }, + { + "epoch": 5.4, + "learning_rate": 3.0109084829824882e-06, + "loss": 0.8554, + "step": 387600 + }, + { + "epoch": 5.4, + "learning_rate": 3.0039426573231725e-06, + "loss": 0.8302, + "step": 387700 + }, + { + "epoch": 5.4, + "learning_rate": 2.996976831663857e-06, + "loss": 0.8193, + "step": 387800 + }, + { + "epoch": 5.4, + "learning_rate": 2.990011006004542e-06, + "loss": 0.8342, + "step": 387900 + }, + { + "epoch": 5.41, + "learning_rate": 2.983045180345226e-06, + "loss": 0.8531, + "step": 388000 + }, + { + "epoch": 5.41, + "learning_rate": 2.9760793546859112e-06, + "loss": 0.8356, + "step": 388100 + }, + { + "epoch": 5.41, + "learning_rate": 2.9691135290265955e-06, + "loss": 0.8063, + "step": 388200 + }, + { + "epoch": 5.41, + "learning_rate": 2.96214770336728e-06, + "loss": 0.8303, + "step": 388300 + }, + { + "epoch": 5.41, + "learning_rate": 2.955181877707965e-06, + "loss": 0.8202, + "step": 388400 + }, + { + "epoch": 5.41, + "learning_rate": 2.948216052048649e-06, + "loss": 0.8353, + "step": 388500 + }, + { + "epoch": 5.41, + "learning_rate": 2.941250226389334e-06, + "loss": 0.838, + "step": 388600 + }, + { + "epoch": 5.42, + "learning_rate": 2.934284400730019e-06, + "loss": 0.8324, + "step": 388700 + }, + { + "epoch": 5.42, + "learning_rate": 2.927318575070703e-06, + "loss": 0.8324, + "step": 388800 + }, + { + "epoch": 5.42, + "learning_rate": 2.920352749411388e-06, + "loss": 0.8674, + "step": 388900 + }, + { + "epoch": 5.42, + "learning_rate": 2.9133869237520725e-06, + "loss": 0.8197, + "step": 389000 + }, + { + "epoch": 5.42, + "learning_rate": 2.906421098092757e-06, + "loss": 0.8349, + "step": 389100 + }, + { + "epoch": 5.42, + "learning_rate": 2.8994552724334415e-06, + "loss": 0.8212, + "step": 389200 + }, + { + "epoch": 5.42, + "learning_rate": 2.8924894467741266e-06, + "loss": 0.8134, + "step": 389300 + }, + { + "epoch": 5.42, + "learning_rate": 2.885523621114811e-06, + "loss": 0.8445, + "step": 389400 + }, + { + "epoch": 5.43, + "learning_rate": 2.8785577954554955e-06, + "loss": 0.8518, + "step": 389500 + }, + { + "epoch": 5.43, + "learning_rate": 2.8716616280527732e-06, + "loss": 0.842, + "step": 389600 + }, + { + "epoch": 5.43, + "learning_rate": 2.864695802393458e-06, + "loss": 0.819, + "step": 389700 + }, + { + "epoch": 5.43, + "learning_rate": 2.857729976734142e-06, + "loss": 0.8206, + "step": 389800 + }, + { + "epoch": 5.43, + "learning_rate": 2.850764151074827e-06, + "loss": 0.8133, + "step": 389900 + }, + { + "epoch": 5.43, + "learning_rate": 2.8437983254155115e-06, + "loss": 0.8705, + "step": 390000 + }, + { + "epoch": 5.43, + "learning_rate": 2.8368324997561962e-06, + "loss": 0.7895, + "step": 390100 + }, + { + "epoch": 5.44, + "learning_rate": 2.829866674096881e-06, + "loss": 0.8369, + "step": 390200 + }, + { + "epoch": 5.44, + "learning_rate": 2.8229008484375656e-06, + "loss": 0.8482, + "step": 390300 + }, + { + "epoch": 5.44, + "learning_rate": 2.81593502277825e-06, + "loss": 0.839, + "step": 390400 + }, + { + "epoch": 5.44, + "learning_rate": 2.8089691971189345e-06, + "loss": 0.8442, + "step": 390500 + }, + { + "epoch": 5.44, + "learning_rate": 2.802003371459619e-06, + "loss": 0.8403, + "step": 390600 + }, + { + "epoch": 5.44, + "learning_rate": 2.795037545800304e-06, + "loss": 0.8377, + "step": 390700 + }, + { + "epoch": 5.44, + "learning_rate": 2.7880717201409886e-06, + "loss": 0.7882, + "step": 390800 + }, + { + "epoch": 5.45, + "learning_rate": 2.781105894481673e-06, + "loss": 0.8355, + "step": 390900 + }, + { + "epoch": 5.45, + "learning_rate": 2.7741400688223575e-06, + "loss": 0.8392, + "step": 391000 + }, + { + "epoch": 5.45, + "learning_rate": 2.767174243163042e-06, + "loss": 0.8438, + "step": 391100 + }, + { + "epoch": 5.45, + "learning_rate": 2.7602084175037265e-06, + "loss": 0.8294, + "step": 391200 + }, + { + "epoch": 5.45, + "learning_rate": 2.7532425918444116e-06, + "loss": 0.8563, + "step": 391300 + }, + { + "epoch": 5.45, + "learning_rate": 2.7462767661850963e-06, + "loss": 0.7958, + "step": 391400 + }, + { + "epoch": 5.45, + "learning_rate": 2.7393109405257805e-06, + "loss": 0.8421, + "step": 391500 + }, + { + "epoch": 5.46, + "learning_rate": 2.732345114866465e-06, + "loss": 0.8167, + "step": 391600 + }, + { + "epoch": 5.46, + "learning_rate": 2.72537928920715e-06, + "loss": 0.815, + "step": 391700 + }, + { + "epoch": 5.46, + "learning_rate": 2.718413463547834e-06, + "loss": 0.8459, + "step": 391800 + }, + { + "epoch": 5.46, + "learning_rate": 2.7114476378885192e-06, + "loss": 0.83, + "step": 391900 + }, + { + "epoch": 5.46, + "learning_rate": 2.704481812229204e-06, + "loss": 0.8363, + "step": 392000 + }, + { + "epoch": 5.46, + "learning_rate": 2.697515986569888e-06, + "loss": 0.8393, + "step": 392100 + }, + { + "epoch": 5.46, + "learning_rate": 2.690550160910573e-06, + "loss": 0.8446, + "step": 392200 + }, + { + "epoch": 5.47, + "learning_rate": 2.6836539935078506e-06, + "loss": 0.8503, + "step": 392300 + }, + { + "epoch": 5.47, + "learning_rate": 2.6766881678485353e-06, + "loss": 0.858, + "step": 392400 + }, + { + "epoch": 5.47, + "learning_rate": 2.6697223421892195e-06, + "loss": 0.8208, + "step": 392500 + }, + { + "epoch": 5.47, + "learning_rate": 2.662756516529904e-06, + "loss": 0.8277, + "step": 392600 + }, + { + "epoch": 5.47, + "learning_rate": 2.6557906908705893e-06, + "loss": 0.8217, + "step": 392700 + }, + { + "epoch": 5.47, + "learning_rate": 2.6488248652112736e-06, + "loss": 0.7853, + "step": 392800 + }, + { + "epoch": 5.47, + "learning_rate": 2.6418590395519582e-06, + "loss": 0.8538, + "step": 392900 + }, + { + "epoch": 5.48, + "learning_rate": 2.634893213892643e-06, + "loss": 0.8455, + "step": 393000 + }, + { + "epoch": 5.48, + "learning_rate": 2.627927388233327e-06, + "loss": 0.8299, + "step": 393100 + }, + { + "epoch": 5.48, + "learning_rate": 2.620961562574012e-06, + "loss": 0.8385, + "step": 393200 + }, + { + "epoch": 5.48, + "learning_rate": 2.6139957369146966e-06, + "loss": 0.8374, + "step": 393300 + }, + { + "epoch": 5.48, + "learning_rate": 2.6070299112553812e-06, + "loss": 0.818, + "step": 393400 + }, + { + "epoch": 5.48, + "learning_rate": 2.600064085596066e-06, + "loss": 0.8294, + "step": 393500 + }, + { + "epoch": 5.48, + "learning_rate": 2.59309825993675e-06, + "loss": 0.8212, + "step": 393600 + }, + { + "epoch": 5.48, + "learning_rate": 2.586132434277435e-06, + "loss": 0.8319, + "step": 393700 + }, + { + "epoch": 5.49, + "learning_rate": 2.5791666086181196e-06, + "loss": 0.8353, + "step": 393800 + }, + { + "epoch": 5.49, + "learning_rate": 2.572200782958804e-06, + "loss": 0.8453, + "step": 393900 + }, + { + "epoch": 5.49, + "learning_rate": 2.565234957299489e-06, + "loss": 0.8301, + "step": 394000 + }, + { + "epoch": 5.49, + "learning_rate": 2.5582691316401736e-06, + "loss": 0.8026, + "step": 394100 + }, + { + "epoch": 5.49, + "learning_rate": 2.551303305980858e-06, + "loss": 0.8317, + "step": 394200 + }, + { + "epoch": 5.49, + "learning_rate": 2.5443374803215425e-06, + "loss": 0.8567, + "step": 394300 + }, + { + "epoch": 5.49, + "learning_rate": 2.5373716546622272e-06, + "loss": 0.842, + "step": 394400 + }, + { + "epoch": 5.5, + "learning_rate": 2.5304058290029115e-06, + "loss": 0.8337, + "step": 394500 + }, + { + "epoch": 5.5, + "learning_rate": 2.5234400033435966e-06, + "loss": 0.8284, + "step": 394600 + }, + { + "epoch": 5.5, + "learning_rate": 2.5164741776842813e-06, + "loss": 0.8391, + "step": 394700 + }, + { + "epoch": 5.5, + "learning_rate": 2.5095083520249655e-06, + "loss": 0.8143, + "step": 394800 + }, + { + "epoch": 5.5, + "learning_rate": 2.5025425263656502e-06, + "loss": 0.8268, + "step": 394900 + }, + { + "epoch": 5.5, + "learning_rate": 2.4955767007063345e-06, + "loss": 0.8075, + "step": 395000 + }, + { + "epoch": 5.5, + "learning_rate": 2.488610875047019e-06, + "loss": 0.8218, + "step": 395100 + }, + { + "epoch": 5.51, + "learning_rate": 2.4816450493877043e-06, + "loss": 0.8452, + "step": 395200 + }, + { + "epoch": 5.51, + "learning_rate": 2.4746792237283885e-06, + "loss": 0.8349, + "step": 395300 + }, + { + "epoch": 5.51, + "learning_rate": 2.467713398069073e-06, + "loss": 0.8594, + "step": 395400 + }, + { + "epoch": 5.51, + "learning_rate": 2.460747572409758e-06, + "loss": 0.8357, + "step": 395500 + }, + { + "epoch": 5.51, + "learning_rate": 2.453781746750442e-06, + "loss": 0.8482, + "step": 395600 + }, + { + "epoch": 5.51, + "learning_rate": 2.446815921091127e-06, + "loss": 0.8132, + "step": 395700 + }, + { + "epoch": 5.51, + "learning_rate": 2.439850095431812e-06, + "loss": 0.8626, + "step": 395800 + }, + { + "epoch": 5.52, + "learning_rate": 2.432884269772496e-06, + "loss": 0.8244, + "step": 395900 + }, + { + "epoch": 5.52, + "learning_rate": 2.425918444113181e-06, + "loss": 0.8427, + "step": 396000 + }, + { + "epoch": 5.52, + "learning_rate": 2.4189526184538656e-06, + "loss": 0.8401, + "step": 396100 + }, + { + "epoch": 5.52, + "learning_rate": 2.4120564510511433e-06, + "loss": 0.8222, + "step": 396200 + }, + { + "epoch": 5.52, + "learning_rate": 2.4050906253918275e-06, + "loss": 0.845, + "step": 396300 + }, + { + "epoch": 5.52, + "learning_rate": 2.3981247997325122e-06, + "loss": 0.818, + "step": 396400 + }, + { + "epoch": 5.52, + "learning_rate": 2.391158974073197e-06, + "loss": 0.8338, + "step": 396500 + }, + { + "epoch": 5.53, + "learning_rate": 2.3841931484138816e-06, + "loss": 0.8357, + "step": 396600 + }, + { + "epoch": 5.53, + "learning_rate": 2.3772273227545663e-06, + "loss": 0.8255, + "step": 396700 + }, + { + "epoch": 5.53, + "learning_rate": 2.370261497095251e-06, + "loss": 0.8069, + "step": 396800 + }, + { + "epoch": 5.53, + "learning_rate": 2.363295671435935e-06, + "loss": 0.8002, + "step": 396900 + }, + { + "epoch": 5.53, + "learning_rate": 2.35632984577662e-06, + "loss": 0.8583, + "step": 397000 + }, + { + "epoch": 5.53, + "learning_rate": 2.3493640201173046e-06, + "loss": 0.8307, + "step": 397100 + }, + { + "epoch": 5.53, + "learning_rate": 2.3423981944579893e-06, + "loss": 0.8362, + "step": 397200 + }, + { + "epoch": 5.54, + "learning_rate": 2.335432368798674e-06, + "loss": 0.8156, + "step": 397300 + }, + { + "epoch": 5.54, + "learning_rate": 2.3284665431393586e-06, + "loss": 0.8103, + "step": 397400 + }, + { + "epoch": 5.54, + "learning_rate": 2.321500717480043e-06, + "loss": 0.8506, + "step": 397500 + }, + { + "epoch": 5.54, + "learning_rate": 2.3145348918207276e-06, + "loss": 0.8415, + "step": 397600 + }, + { + "epoch": 5.54, + "learning_rate": 2.307569066161412e-06, + "loss": 0.8264, + "step": 397700 + }, + { + "epoch": 5.54, + "learning_rate": 2.300603240502097e-06, + "loss": 0.8058, + "step": 397800 + }, + { + "epoch": 5.54, + "learning_rate": 2.2936374148427816e-06, + "loss": 0.8303, + "step": 397900 + }, + { + "epoch": 5.54, + "learning_rate": 2.286671589183466e-06, + "loss": 0.8256, + "step": 398000 + }, + { + "epoch": 5.55, + "learning_rate": 2.2797057635241506e-06, + "loss": 0.8057, + "step": 398100 + }, + { + "epoch": 5.55, + "learning_rate": 2.2727399378648352e-06, + "loss": 0.8375, + "step": 398200 + }, + { + "epoch": 5.55, + "learning_rate": 2.2657741122055195e-06, + "loss": 0.8279, + "step": 398300 + }, + { + "epoch": 5.55, + "learning_rate": 2.258808286546204e-06, + "loss": 0.8181, + "step": 398400 + }, + { + "epoch": 5.55, + "learning_rate": 2.2518424608868893e-06, + "loss": 0.8052, + "step": 398500 + }, + { + "epoch": 5.55, + "learning_rate": 2.2448766352275736e-06, + "loss": 0.8453, + "step": 398600 + }, + { + "epoch": 5.55, + "learning_rate": 2.2379108095682582e-06, + "loss": 0.8587, + "step": 398700 + }, + { + "epoch": 5.56, + "learning_rate": 2.230944983908943e-06, + "loss": 0.8429, + "step": 398800 + }, + { + "epoch": 5.56, + "learning_rate": 2.223979158249627e-06, + "loss": 0.8207, + "step": 398900 + }, + { + "epoch": 5.56, + "learning_rate": 2.217082990846905e-06, + "loss": 0.8515, + "step": 399000 + }, + { + "epoch": 5.56, + "learning_rate": 2.2101171651875896e-06, + "loss": 0.8403, + "step": 399100 + }, + { + "epoch": 5.56, + "learning_rate": 2.2031513395282742e-06, + "loss": 0.8369, + "step": 399200 + }, + { + "epoch": 5.56, + "learning_rate": 2.196185513868959e-06, + "loss": 0.841, + "step": 399300 + }, + { + "epoch": 5.56, + "learning_rate": 2.1892196882096436e-06, + "loss": 0.8074, + "step": 399400 + }, + { + "epoch": 5.57, + "learning_rate": 2.1822538625503283e-06, + "loss": 0.7971, + "step": 399500 + }, + { + "epoch": 5.57, + "learning_rate": 2.1752880368910126e-06, + "loss": 0.8242, + "step": 399600 + }, + { + "epoch": 5.57, + "learning_rate": 2.1683222112316972e-06, + "loss": 0.8233, + "step": 399700 + }, + { + "epoch": 5.57, + "learning_rate": 2.161356385572382e-06, + "loss": 0.8398, + "step": 399800 + }, + { + "epoch": 5.57, + "learning_rate": 2.1543905599130666e-06, + "loss": 0.8336, + "step": 399900 + }, + { + "epoch": 5.57, + "learning_rate": 2.1474247342537513e-06, + "loss": 0.8367, + "step": 400000 + }, + { + "epoch": 5.57, + "learning_rate": 2.140458908594436e-06, + "loss": 0.8471, + "step": 400100 + }, + { + "epoch": 5.58, + "learning_rate": 2.1334930829351202e-06, + "loss": 0.8388, + "step": 400200 + }, + { + "epoch": 5.58, + "learning_rate": 2.126527257275805e-06, + "loss": 0.8394, + "step": 400300 + }, + { + "epoch": 5.58, + "learning_rate": 2.119561431616489e-06, + "loss": 0.8561, + "step": 400400 + }, + { + "epoch": 5.58, + "learning_rate": 2.1125956059571743e-06, + "loss": 0.8279, + "step": 400500 + }, + { + "epoch": 5.58, + "learning_rate": 2.105629780297859e-06, + "loss": 0.818, + "step": 400600 + }, + { + "epoch": 5.58, + "learning_rate": 2.0986639546385432e-06, + "loss": 0.8218, + "step": 400700 + }, + { + "epoch": 5.58, + "learning_rate": 2.091698128979228e-06, + "loss": 0.831, + "step": 400800 + }, + { + "epoch": 5.59, + "learning_rate": 2.0847323033199126e-06, + "loss": 0.8279, + "step": 400900 + }, + { + "epoch": 5.59, + "learning_rate": 2.077766477660597e-06, + "loss": 0.8396, + "step": 401000 + }, + { + "epoch": 5.59, + "learning_rate": 2.070800652001282e-06, + "loss": 0.8516, + "step": 401100 + }, + { + "epoch": 5.59, + "learning_rate": 2.0638348263419666e-06, + "loss": 0.8415, + "step": 401200 + }, + { + "epoch": 5.59, + "learning_rate": 2.056869000682651e-06, + "loss": 0.8537, + "step": 401300 + }, + { + "epoch": 5.59, + "learning_rate": 2.0499031750233356e-06, + "loss": 0.8165, + "step": 401400 + }, + { + "epoch": 5.59, + "learning_rate": 2.0429373493640203e-06, + "loss": 0.8216, + "step": 401500 + }, + { + "epoch": 5.59, + "learning_rate": 2.0359715237047045e-06, + "loss": 0.8369, + "step": 401600 + }, + { + "epoch": 5.6, + "learning_rate": 2.0290056980453896e-06, + "loss": 0.8306, + "step": 401700 + }, + { + "epoch": 5.6, + "learning_rate": 2.022039872386074e-06, + "loss": 0.8369, + "step": 401800 + }, + { + "epoch": 5.6, + "learning_rate": 2.0150740467267586e-06, + "loss": 0.8312, + "step": 401900 + }, + { + "epoch": 5.6, + "learning_rate": 2.0081082210674433e-06, + "loss": 0.8204, + "step": 402000 + }, + { + "epoch": 5.6, + "learning_rate": 2.0011423954081275e-06, + "loss": 0.8472, + "step": 402100 + }, + { + "epoch": 5.6, + "learning_rate": 1.994176569748812e-06, + "loss": 0.8314, + "step": 402200 + }, + { + "epoch": 5.6, + "learning_rate": 1.9872107440894973e-06, + "loss": 0.8275, + "step": 402300 + }, + { + "epoch": 5.61, + "learning_rate": 1.9802449184301816e-06, + "loss": 0.8527, + "step": 402400 + }, + { + "epoch": 5.61, + "learning_rate": 1.9733487510274597e-06, + "loss": 0.8428, + "step": 402500 + }, + { + "epoch": 5.61, + "learning_rate": 1.966382925368144e-06, + "loss": 0.8339, + "step": 402600 + }, + { + "epoch": 5.61, + "learning_rate": 1.9594170997088286e-06, + "loss": 0.8574, + "step": 402700 + }, + { + "epoch": 5.61, + "learning_rate": 1.9524512740495133e-06, + "loss": 0.8299, + "step": 402800 + }, + { + "epoch": 5.61, + "learning_rate": 1.9454854483901976e-06, + "loss": 0.8344, + "step": 402900 + }, + { + "epoch": 5.61, + "learning_rate": 1.9385196227308823e-06, + "loss": 0.7946, + "step": 403000 + }, + { + "epoch": 5.62, + "learning_rate": 1.931553797071567e-06, + "loss": 0.806, + "step": 403100 + }, + { + "epoch": 5.62, + "learning_rate": 1.9245879714122516e-06, + "loss": 0.8634, + "step": 403200 + }, + { + "epoch": 5.62, + "learning_rate": 1.9176221457529363e-06, + "loss": 0.8452, + "step": 403300 + }, + { + "epoch": 5.62, + "learning_rate": 1.9106563200936206e-06, + "loss": 0.8458, + "step": 403400 + }, + { + "epoch": 5.62, + "learning_rate": 1.9036904944343053e-06, + "loss": 0.8422, + "step": 403500 + }, + { + "epoch": 5.62, + "learning_rate": 1.8967246687749901e-06, + "loss": 0.8609, + "step": 403600 + }, + { + "epoch": 5.62, + "learning_rate": 1.8897588431156744e-06, + "loss": 0.8105, + "step": 403700 + }, + { + "epoch": 5.63, + "learning_rate": 1.882793017456359e-06, + "loss": 0.8159, + "step": 403800 + }, + { + "epoch": 5.63, + "learning_rate": 1.875827191797044e-06, + "loss": 0.8604, + "step": 403900 + }, + { + "epoch": 5.63, + "learning_rate": 1.8688613661377285e-06, + "loss": 0.8309, + "step": 404000 + }, + { + "epoch": 5.63, + "learning_rate": 1.861895540478413e-06, + "loss": 0.8026, + "step": 404100 + }, + { + "epoch": 5.63, + "learning_rate": 1.8549297148190976e-06, + "loss": 0.8271, + "step": 404200 + }, + { + "epoch": 5.63, + "learning_rate": 1.847963889159782e-06, + "loss": 0.837, + "step": 404300 + }, + { + "epoch": 5.63, + "learning_rate": 1.8409980635004668e-06, + "loss": 0.824, + "step": 404400 + }, + { + "epoch": 5.64, + "learning_rate": 1.8340322378411514e-06, + "loss": 0.8557, + "step": 404500 + }, + { + "epoch": 5.64, + "learning_rate": 1.827066412181836e-06, + "loss": 0.8109, + "step": 404600 + }, + { + "epoch": 5.64, + "learning_rate": 1.8201005865225206e-06, + "loss": 0.8213, + "step": 404700 + }, + { + "epoch": 5.64, + "learning_rate": 1.8131347608632053e-06, + "loss": 0.8316, + "step": 404800 + }, + { + "epoch": 5.64, + "learning_rate": 1.8061689352038898e-06, + "loss": 0.8072, + "step": 404900 + }, + { + "epoch": 5.64, + "learning_rate": 1.7992031095445742e-06, + "loss": 0.8013, + "step": 405000 + }, + { + "epoch": 5.64, + "learning_rate": 1.7923069421418521e-06, + "loss": 0.8226, + "step": 405100 + }, + { + "epoch": 5.65, + "learning_rate": 1.7853411164825366e-06, + "loss": 0.8322, + "step": 405200 + }, + { + "epoch": 5.65, + "learning_rate": 1.7783752908232215e-06, + "loss": 0.8296, + "step": 405300 + }, + { + "epoch": 5.65, + "learning_rate": 1.771409465163906e-06, + "loss": 0.8525, + "step": 405400 + }, + { + "epoch": 5.65, + "learning_rate": 1.7644436395045905e-06, + "loss": 0.8294, + "step": 405500 + }, + { + "epoch": 5.65, + "learning_rate": 1.7574778138452751e-06, + "loss": 0.8088, + "step": 405600 + }, + { + "epoch": 5.65, + "learning_rate": 1.7505119881859598e-06, + "loss": 0.8109, + "step": 405700 + }, + { + "epoch": 5.65, + "learning_rate": 1.7435461625266443e-06, + "loss": 0.836, + "step": 405800 + }, + { + "epoch": 5.65, + "learning_rate": 1.7365803368673288e-06, + "loss": 0.8353, + "step": 405900 + }, + { + "epoch": 5.66, + "learning_rate": 1.7296145112080137e-06, + "loss": 0.8345, + "step": 406000 + }, + { + "epoch": 5.66, + "learning_rate": 1.7226486855486981e-06, + "loss": 0.8032, + "step": 406100 + }, + { + "epoch": 5.66, + "learning_rate": 1.7156828598893826e-06, + "loss": 0.8448, + "step": 406200 + }, + { + "epoch": 5.66, + "learning_rate": 1.7087170342300673e-06, + "loss": 0.8559, + "step": 406300 + }, + { + "epoch": 5.66, + "learning_rate": 1.701751208570752e-06, + "loss": 0.8094, + "step": 406400 + }, + { + "epoch": 5.66, + "learning_rate": 1.6947853829114364e-06, + "loss": 0.8585, + "step": 406500 + }, + { + "epoch": 5.66, + "learning_rate": 1.6878195572521211e-06, + "loss": 0.8252, + "step": 406600 + }, + { + "epoch": 5.67, + "learning_rate": 1.6808537315928058e-06, + "loss": 0.7963, + "step": 406700 + }, + { + "epoch": 5.67, + "learning_rate": 1.6738879059334903e-06, + "loss": 0.8484, + "step": 406800 + }, + { + "epoch": 5.67, + "learning_rate": 1.666922080274175e-06, + "loss": 0.8171, + "step": 406900 + }, + { + "epoch": 5.67, + "learning_rate": 1.6599562546148594e-06, + "loss": 0.8206, + "step": 407000 + }, + { + "epoch": 5.67, + "learning_rate": 1.6529904289555441e-06, + "loss": 0.8143, + "step": 407100 + }, + { + "epoch": 5.67, + "learning_rate": 1.6460942615528218e-06, + "loss": 0.8433, + "step": 407200 + }, + { + "epoch": 5.67, + "learning_rate": 1.6391284358935065e-06, + "loss": 0.8295, + "step": 407300 + }, + { + "epoch": 5.68, + "learning_rate": 1.6321626102341912e-06, + "loss": 0.8231, + "step": 407400 + }, + { + "epoch": 5.68, + "learning_rate": 1.6251967845748757e-06, + "loss": 0.8251, + "step": 407500 + }, + { + "epoch": 5.68, + "learning_rate": 1.6182309589155601e-06, + "loss": 0.8477, + "step": 407600 + }, + { + "epoch": 5.68, + "learning_rate": 1.611265133256245e-06, + "loss": 0.8329, + "step": 407700 + }, + { + "epoch": 5.68, + "learning_rate": 1.6042993075969295e-06, + "loss": 0.8176, + "step": 407800 + }, + { + "epoch": 5.68, + "learning_rate": 1.597333481937614e-06, + "loss": 0.8167, + "step": 407900 + }, + { + "epoch": 5.68, + "learning_rate": 1.5903676562782989e-06, + "loss": 0.8161, + "step": 408000 + }, + { + "epoch": 5.69, + "learning_rate": 1.5834018306189833e-06, + "loss": 0.805, + "step": 408100 + }, + { + "epoch": 5.69, + "learning_rate": 1.5764360049596678e-06, + "loss": 0.8254, + "step": 408200 + }, + { + "epoch": 5.69, + "learning_rate": 1.5694701793003525e-06, + "loss": 0.847, + "step": 408300 + }, + { + "epoch": 5.69, + "learning_rate": 1.5625043536410372e-06, + "loss": 0.7958, + "step": 408400 + }, + { + "epoch": 5.69, + "learning_rate": 1.5555385279817216e-06, + "loss": 0.8541, + "step": 408500 + }, + { + "epoch": 5.69, + "learning_rate": 1.5485727023224063e-06, + "loss": 0.8399, + "step": 408600 + }, + { + "epoch": 5.69, + "learning_rate": 1.541606876663091e-06, + "loss": 0.8413, + "step": 408700 + }, + { + "epoch": 5.7, + "learning_rate": 1.5346410510037755e-06, + "loss": 0.8071, + "step": 408800 + }, + { + "epoch": 5.7, + "learning_rate": 1.5276752253444602e-06, + "loss": 0.8483, + "step": 408900 + }, + { + "epoch": 5.7, + "learning_rate": 1.5207093996851446e-06, + "loss": 0.827, + "step": 409000 + }, + { + "epoch": 5.7, + "learning_rate": 1.5137435740258293e-06, + "loss": 0.8573, + "step": 409100 + }, + { + "epoch": 5.7, + "learning_rate": 1.506777748366514e-06, + "loss": 0.8162, + "step": 409200 + }, + { + "epoch": 5.7, + "learning_rate": 1.4998119227071985e-06, + "loss": 0.8183, + "step": 409300 + }, + { + "epoch": 5.7, + "learning_rate": 1.4928460970478832e-06, + "loss": 0.847, + "step": 409400 + }, + { + "epoch": 5.71, + "learning_rate": 1.4858802713885678e-06, + "loss": 0.8509, + "step": 409500 + }, + { + "epoch": 5.71, + "learning_rate": 1.4789144457292523e-06, + "loss": 0.8433, + "step": 409600 + }, + { + "epoch": 5.71, + "learning_rate": 1.4719486200699368e-06, + "loss": 0.8343, + "step": 409700 + }, + { + "epoch": 5.71, + "learning_rate": 1.4649827944106217e-06, + "loss": 0.8438, + "step": 409800 + }, + { + "epoch": 5.71, + "learning_rate": 1.4580169687513061e-06, + "loss": 0.8591, + "step": 409900 + }, + { + "epoch": 5.71, + "learning_rate": 1.4510511430919906e-06, + "loss": 0.8236, + "step": 410000 + }, + { + "epoch": 5.71, + "learning_rate": 1.4440853174326755e-06, + "loss": 0.8381, + "step": 410100 + }, + { + "epoch": 5.71, + "learning_rate": 1.43711949177336e-06, + "loss": 0.8261, + "step": 410200 + }, + { + "epoch": 5.72, + "learning_rate": 1.4301536661140445e-06, + "loss": 0.8465, + "step": 410300 + }, + { + "epoch": 5.72, + "learning_rate": 1.4231878404547291e-06, + "loss": 0.8374, + "step": 410400 + }, + { + "epoch": 5.72, + "learning_rate": 1.4162220147954138e-06, + "loss": 0.8262, + "step": 410500 + }, + { + "epoch": 5.72, + "learning_rate": 1.4092561891360983e-06, + "loss": 0.8437, + "step": 410600 + }, + { + "epoch": 5.72, + "learning_rate": 1.402290363476783e-06, + "loss": 0.801, + "step": 410700 + }, + { + "epoch": 5.72, + "learning_rate": 1.3953245378174677e-06, + "loss": 0.8187, + "step": 410800 + }, + { + "epoch": 5.72, + "learning_rate": 1.3884283704147454e-06, + "loss": 0.827, + "step": 410900 + }, + { + "epoch": 5.73, + "learning_rate": 1.3814625447554298e-06, + "loss": 0.816, + "step": 411000 + }, + { + "epoch": 5.73, + "learning_rate": 1.3744967190961145e-06, + "loss": 0.8482, + "step": 411100 + }, + { + "epoch": 5.73, + "learning_rate": 1.367530893436799e-06, + "loss": 0.8296, + "step": 411200 + }, + { + "epoch": 5.73, + "learning_rate": 1.3605650677774837e-06, + "loss": 0.8425, + "step": 411300 + }, + { + "epoch": 5.73, + "learning_rate": 1.3535992421181683e-06, + "loss": 0.8138, + "step": 411400 + }, + { + "epoch": 5.73, + "learning_rate": 1.3466334164588528e-06, + "loss": 0.8248, + "step": 411500 + }, + { + "epoch": 5.73, + "learning_rate": 1.3396675907995375e-06, + "loss": 0.8417, + "step": 411600 + }, + { + "epoch": 5.74, + "learning_rate": 1.332701765140222e-06, + "loss": 0.845, + "step": 411700 + }, + { + "epoch": 5.74, + "learning_rate": 1.3257359394809067e-06, + "loss": 0.8463, + "step": 411800 + }, + { + "epoch": 5.74, + "learning_rate": 1.3187701138215913e-06, + "loss": 0.8482, + "step": 411900 + }, + { + "epoch": 5.74, + "learning_rate": 1.3118042881622758e-06, + "loss": 0.8133, + "step": 412000 + }, + { + "epoch": 5.74, + "learning_rate": 1.3048384625029605e-06, + "loss": 0.8457, + "step": 412100 + }, + { + "epoch": 5.74, + "learning_rate": 1.2978726368436452e-06, + "loss": 0.7992, + "step": 412200 + }, + { + "epoch": 5.74, + "learning_rate": 1.2909068111843297e-06, + "loss": 0.8254, + "step": 412300 + }, + { + "epoch": 5.75, + "learning_rate": 1.2839409855250141e-06, + "loss": 0.8398, + "step": 412400 + }, + { + "epoch": 5.75, + "learning_rate": 1.276975159865699e-06, + "loss": 0.8129, + "step": 412500 + }, + { + "epoch": 5.75, + "learning_rate": 1.2700093342063835e-06, + "loss": 0.8211, + "step": 412600 + }, + { + "epoch": 5.75, + "learning_rate": 1.263043508547068e-06, + "loss": 0.8203, + "step": 412700 + }, + { + "epoch": 5.75, + "learning_rate": 1.2560776828877529e-06, + "loss": 0.8585, + "step": 412800 + }, + { + "epoch": 5.75, + "learning_rate": 1.2491815154850303e-06, + "loss": 0.8181, + "step": 412900 + }, + { + "epoch": 5.75, + "learning_rate": 1.242215689825715e-06, + "loss": 0.8559, + "step": 413000 + }, + { + "epoch": 5.76, + "learning_rate": 1.2352498641663997e-06, + "loss": 0.8108, + "step": 413100 + }, + { + "epoch": 5.76, + "learning_rate": 1.2282840385070842e-06, + "loss": 0.8364, + "step": 413200 + }, + { + "epoch": 5.76, + "learning_rate": 1.2213182128477689e-06, + "loss": 0.8198, + "step": 413300 + }, + { + "epoch": 5.76, + "learning_rate": 1.2143523871884535e-06, + "loss": 0.8501, + "step": 413400 + }, + { + "epoch": 5.76, + "learning_rate": 1.207386561529138e-06, + "loss": 0.8246, + "step": 413500 + }, + { + "epoch": 5.76, + "learning_rate": 1.2004207358698227e-06, + "loss": 0.79, + "step": 413600 + }, + { + "epoch": 5.76, + "learning_rate": 1.1934549102105074e-06, + "loss": 0.8225, + "step": 413700 + }, + { + "epoch": 5.76, + "learning_rate": 1.1864890845511919e-06, + "loss": 0.8366, + "step": 413800 + }, + { + "epoch": 5.77, + "learning_rate": 1.1795232588918765e-06, + "loss": 0.8177, + "step": 413900 + }, + { + "epoch": 5.77, + "learning_rate": 1.172557433232561e-06, + "loss": 0.8278, + "step": 414000 + }, + { + "epoch": 5.77, + "learning_rate": 1.1655916075732457e-06, + "loss": 0.8216, + "step": 414100 + }, + { + "epoch": 5.77, + "learning_rate": 1.1586257819139304e-06, + "loss": 0.7972, + "step": 414200 + }, + { + "epoch": 5.77, + "learning_rate": 1.1516599562546149e-06, + "loss": 0.8423, + "step": 414300 + }, + { + "epoch": 5.77, + "learning_rate": 1.1446941305952995e-06, + "loss": 0.8451, + "step": 414400 + }, + { + "epoch": 5.77, + "learning_rate": 1.1377283049359842e-06, + "loss": 0.8252, + "step": 414500 + }, + { + "epoch": 5.78, + "learning_rate": 1.1307624792766687e-06, + "loss": 0.8351, + "step": 414600 + }, + { + "epoch": 5.78, + "learning_rate": 1.1237966536173532e-06, + "loss": 0.854, + "step": 414700 + }, + { + "epoch": 5.78, + "learning_rate": 1.116830827958038e-06, + "loss": 0.8025, + "step": 414800 + }, + { + "epoch": 5.78, + "learning_rate": 1.1098650022987225e-06, + "loss": 0.8122, + "step": 414900 + }, + { + "epoch": 5.78, + "learning_rate": 1.102899176639407e-06, + "loss": 0.8385, + "step": 415000 + }, + { + "epoch": 5.78, + "learning_rate": 1.096003009236685e-06, + "loss": 0.8497, + "step": 415100 + }, + { + "epoch": 5.78, + "learning_rate": 1.0890371835773694e-06, + "loss": 0.8524, + "step": 415200 + }, + { + "epoch": 5.79, + "learning_rate": 1.082071357918054e-06, + "loss": 0.8413, + "step": 415300 + }, + { + "epoch": 5.79, + "learning_rate": 1.0751055322587387e-06, + "loss": 0.8408, + "step": 415400 + }, + { + "epoch": 5.79, + "learning_rate": 1.0681397065994232e-06, + "loss": 0.8346, + "step": 415500 + }, + { + "epoch": 5.79, + "learning_rate": 1.061173880940108e-06, + "loss": 0.8443, + "step": 415600 + }, + { + "epoch": 5.79, + "learning_rate": 1.0542080552807926e-06, + "loss": 0.8039, + "step": 415700 + }, + { + "epoch": 5.79, + "learning_rate": 1.047242229621477e-06, + "loss": 0.8309, + "step": 415800 + }, + { + "epoch": 5.79, + "learning_rate": 1.0402764039621617e-06, + "loss": 0.8151, + "step": 415900 + }, + { + "epoch": 5.8, + "learning_rate": 1.0333105783028462e-06, + "loss": 0.8235, + "step": 416000 + }, + { + "epoch": 5.8, + "learning_rate": 1.026344752643531e-06, + "loss": 0.8586, + "step": 416100 + }, + { + "epoch": 5.8, + "learning_rate": 1.0193789269842156e-06, + "loss": 0.8187, + "step": 416200 + }, + { + "epoch": 5.8, + "learning_rate": 1.0124131013249e-06, + "loss": 0.8205, + "step": 416300 + }, + { + "epoch": 5.8, + "learning_rate": 1.0054472756655847e-06, + "loss": 0.8414, + "step": 416400 + }, + { + "epoch": 5.8, + "learning_rate": 9.984814500062692e-07, + "loss": 0.8385, + "step": 416500 + }, + { + "epoch": 5.8, + "learning_rate": 9.915156243469539e-07, + "loss": 0.8484, + "step": 416600 + }, + { + "epoch": 5.81, + "learning_rate": 9.845497986876384e-07, + "loss": 0.8357, + "step": 416700 + }, + { + "epoch": 5.81, + "learning_rate": 9.77583973028323e-07, + "loss": 0.8065, + "step": 416800 + }, + { + "epoch": 5.81, + "learning_rate": 9.706181473690077e-07, + "loss": 0.8406, + "step": 416900 + }, + { + "epoch": 5.81, + "learning_rate": 9.636523217096922e-07, + "loss": 0.8033, + "step": 417000 + }, + { + "epoch": 5.81, + "learning_rate": 9.566864960503769e-07, + "loss": 0.7802, + "step": 417100 + }, + { + "epoch": 5.81, + "learning_rate": 9.497206703910616e-07, + "loss": 0.8161, + "step": 417200 + }, + { + "epoch": 5.81, + "learning_rate": 9.42754844731746e-07, + "loss": 0.8294, + "step": 417300 + }, + { + "epoch": 5.82, + "learning_rate": 9.357890190724307e-07, + "loss": 0.8428, + "step": 417400 + }, + { + "epoch": 5.82, + "learning_rate": 9.288231934131153e-07, + "loss": 0.8358, + "step": 417500 + }, + { + "epoch": 5.82, + "learning_rate": 9.218573677537999e-07, + "loss": 0.8485, + "step": 417600 + }, + { + "epoch": 5.82, + "learning_rate": 9.148915420944845e-07, + "loss": 0.8188, + "step": 417700 + }, + { + "epoch": 5.82, + "learning_rate": 9.07925716435169e-07, + "loss": 0.8022, + "step": 417800 + }, + { + "epoch": 5.82, + "learning_rate": 9.009598907758537e-07, + "loss": 0.8418, + "step": 417900 + }, + { + "epoch": 5.82, + "learning_rate": 8.939940651165383e-07, + "loss": 0.8339, + "step": 418000 + }, + { + "epoch": 5.82, + "learning_rate": 8.870282394572229e-07, + "loss": 0.8511, + "step": 418100 + }, + { + "epoch": 5.83, + "learning_rate": 8.800624137979074e-07, + "loss": 0.8283, + "step": 418200 + }, + { + "epoch": 5.83, + "learning_rate": 8.730965881385921e-07, + "loss": 0.7943, + "step": 418300 + }, + { + "epoch": 5.83, + "learning_rate": 8.661307624792766e-07, + "loss": 0.8262, + "step": 418400 + }, + { + "epoch": 5.83, + "learning_rate": 8.591649368199613e-07, + "loss": 0.8273, + "step": 418500 + }, + { + "epoch": 5.83, + "learning_rate": 8.52268769417239e-07, + "loss": 0.8275, + "step": 418600 + }, + { + "epoch": 5.83, + "learning_rate": 8.453029437579237e-07, + "loss": 0.8275, + "step": 418700 + }, + { + "epoch": 5.83, + "learning_rate": 8.383371180986082e-07, + "loss": 0.857, + "step": 418800 + }, + { + "epoch": 5.84, + "learning_rate": 8.313712924392928e-07, + "loss": 0.8198, + "step": 418900 + }, + { + "epoch": 5.84, + "learning_rate": 8.244054667799775e-07, + "loss": 0.8198, + "step": 419000 + }, + { + "epoch": 5.84, + "learning_rate": 8.174396411206621e-07, + "loss": 0.8533, + "step": 419100 + }, + { + "epoch": 5.84, + "learning_rate": 8.104738154613467e-07, + "loss": 0.8134, + "step": 419200 + }, + { + "epoch": 5.84, + "learning_rate": 8.035079898020312e-07, + "loss": 0.837, + "step": 419300 + }, + { + "epoch": 5.84, + "learning_rate": 7.965421641427159e-07, + "loss": 0.8556, + "step": 419400 + }, + { + "epoch": 5.84, + "learning_rate": 7.895763384834004e-07, + "loss": 0.8463, + "step": 419500 + }, + { + "epoch": 5.85, + "learning_rate": 7.826105128240851e-07, + "loss": 0.856, + "step": 419600 + }, + { + "epoch": 5.85, + "learning_rate": 7.756446871647698e-07, + "loss": 0.8344, + "step": 419700 + }, + { + "epoch": 5.85, + "learning_rate": 7.686788615054542e-07, + "loss": 0.8052, + "step": 419800 + }, + { + "epoch": 5.85, + "learning_rate": 7.617130358461389e-07, + "loss": 0.8433, + "step": 419900 + }, + { + "epoch": 5.85, + "learning_rate": 7.547472101868235e-07, + "loss": 0.8475, + "step": 420000 + }, + { + "epoch": 5.85, + "learning_rate": 7.477813845275081e-07, + "loss": 0.8058, + "step": 420100 + }, + { + "epoch": 5.85, + "learning_rate": 7.408155588681926e-07, + "loss": 0.8244, + "step": 420200 + }, + { + "epoch": 5.86, + "learning_rate": 7.338497332088772e-07, + "loss": 0.8162, + "step": 420300 + }, + { + "epoch": 5.86, + "learning_rate": 7.268839075495619e-07, + "loss": 0.8354, + "step": 420400 + }, + { + "epoch": 5.86, + "learning_rate": 7.199180818902465e-07, + "loss": 0.8327, + "step": 420500 + }, + { + "epoch": 5.86, + "learning_rate": 7.129522562309311e-07, + "loss": 0.8383, + "step": 420600 + }, + { + "epoch": 5.86, + "learning_rate": 7.059864305716156e-07, + "loss": 0.8278, + "step": 420700 + }, + { + "epoch": 5.86, + "learning_rate": 6.990206049123003e-07, + "loss": 0.8357, + "step": 420800 + }, + { + "epoch": 5.86, + "learning_rate": 6.920547792529848e-07, + "loss": 0.8503, + "step": 420900 + }, + { + "epoch": 5.87, + "learning_rate": 6.850889535936695e-07, + "loss": 0.8103, + "step": 421000 + }, + { + "epoch": 5.87, + "learning_rate": 6.781231279343542e-07, + "loss": 0.8206, + "step": 421100 + }, + { + "epoch": 5.87, + "learning_rate": 6.711573022750386e-07, + "loss": 0.8256, + "step": 421200 + }, + { + "epoch": 5.87, + "learning_rate": 6.641914766157233e-07, + "loss": 0.8429, + "step": 421300 + }, + { + "epoch": 5.87, + "learning_rate": 6.572256509564079e-07, + "loss": 0.8381, + "step": 421400 + }, + { + "epoch": 5.87, + "learning_rate": 6.502598252970925e-07, + "loss": 0.8227, + "step": 421500 + }, + { + "epoch": 5.87, + "learning_rate": 6.43293999637777e-07, + "loss": 0.8342, + "step": 421600 + }, + { + "epoch": 5.87, + "learning_rate": 6.363281739784617e-07, + "loss": 0.8107, + "step": 421700 + }, + { + "epoch": 5.88, + "learning_rate": 6.293623483191463e-07, + "loss": 0.8456, + "step": 421800 + }, + { + "epoch": 5.88, + "learning_rate": 6.223965226598309e-07, + "loss": 0.8257, + "step": 421900 + }, + { + "epoch": 5.88, + "learning_rate": 6.154306970005156e-07, + "loss": 0.8303, + "step": 422000 + }, + { + "epoch": 5.88, + "learning_rate": 6.084648713412e-07, + "loss": 0.8303, + "step": 422100 + }, + { + "epoch": 5.88, + "learning_rate": 6.014990456818847e-07, + "loss": 0.8193, + "step": 422200 + }, + { + "epoch": 5.88, + "learning_rate": 5.946028782791624e-07, + "loss": 0.8381, + "step": 422300 + }, + { + "epoch": 5.88, + "learning_rate": 5.876370526198471e-07, + "loss": 0.8263, + "step": 422400 + }, + { + "epoch": 5.89, + "learning_rate": 5.806712269605317e-07, + "loss": 0.8478, + "step": 422500 + }, + { + "epoch": 5.89, + "learning_rate": 5.737054013012163e-07, + "loss": 0.8227, + "step": 422600 + }, + { + "epoch": 5.89, + "learning_rate": 5.667395756419008e-07, + "loss": 0.8371, + "step": 422700 + }, + { + "epoch": 5.89, + "learning_rate": 5.597737499825854e-07, + "loss": 0.8067, + "step": 422800 + }, + { + "epoch": 5.89, + "learning_rate": 5.5280792432327e-07, + "loss": 0.8135, + "step": 422900 + }, + { + "epoch": 5.89, + "learning_rate": 5.458420986639547e-07, + "loss": 0.8414, + "step": 423000 + }, + { + "epoch": 5.89, + "learning_rate": 5.388762730046393e-07, + "loss": 0.8406, + "step": 423100 + }, + { + "epoch": 5.9, + "learning_rate": 5.319104473453238e-07, + "loss": 0.8134, + "step": 423200 + }, + { + "epoch": 5.9, + "learning_rate": 5.249446216860085e-07, + "loss": 0.8446, + "step": 423300 + }, + { + "epoch": 5.9, + "learning_rate": 5.17978796026693e-07, + "loss": 0.8345, + "step": 423400 + }, + { + "epoch": 5.9, + "learning_rate": 5.110129703673777e-07, + "loss": 0.8047, + "step": 423500 + }, + { + "epoch": 5.9, + "learning_rate": 5.040471447080622e-07, + "loss": 0.8492, + "step": 423600 + }, + { + "epoch": 5.9, + "learning_rate": 4.970813190487468e-07, + "loss": 0.8023, + "step": 423700 + }, + { + "epoch": 5.9, + "learning_rate": 4.901154933894315e-07, + "loss": 0.8161, + "step": 423800 + }, + { + "epoch": 5.91, + "learning_rate": 4.831496677301161e-07, + "loss": 0.8424, + "step": 423900 + }, + { + "epoch": 5.91, + "learning_rate": 4.761838420708007e-07, + "loss": 0.836, + "step": 424000 + }, + { + "epoch": 5.91, + "learning_rate": 4.6921801641148524e-07, + "loss": 0.8129, + "step": 424100 + }, + { + "epoch": 5.91, + "learning_rate": 4.6225219075216987e-07, + "loss": 0.8521, + "step": 424200 + }, + { + "epoch": 5.91, + "learning_rate": 4.552863650928545e-07, + "loss": 0.8312, + "step": 424300 + }, + { + "epoch": 5.91, + "learning_rate": 4.483205394335391e-07, + "loss": 0.8064, + "step": 424400 + }, + { + "epoch": 5.91, + "learning_rate": 4.4135471377422365e-07, + "loss": 0.8389, + "step": 424500 + }, + { + "epoch": 5.92, + "learning_rate": 4.3438888811490823e-07, + "loss": 0.8164, + "step": 424600 + }, + { + "epoch": 5.92, + "learning_rate": 4.2742306245559286e-07, + "loss": 0.8236, + "step": 424700 + }, + { + "epoch": 5.92, + "learning_rate": 4.204572367962775e-07, + "loss": 0.8056, + "step": 424800 + }, + { + "epoch": 5.92, + "learning_rate": 4.1349141113696207e-07, + "loss": 0.8357, + "step": 424900 + }, + { + "epoch": 5.92, + "learning_rate": 4.065255854776467e-07, + "loss": 0.8209, + "step": 425000 + }, + { + "epoch": 5.92, + "learning_rate": 3.995597598183313e-07, + "loss": 0.8222, + "step": 425100 + }, + { + "epoch": 5.92, + "learning_rate": 3.9259393415901585e-07, + "loss": 0.8028, + "step": 425200 + }, + { + "epoch": 5.93, + "learning_rate": 3.856281084997005e-07, + "loss": 0.8125, + "step": 425300 + }, + { + "epoch": 5.93, + "learning_rate": 3.7873194109697824e-07, + "loss": 0.8536, + "step": 425400 + }, + { + "epoch": 5.93, + "learning_rate": 3.7176611543766287e-07, + "loss": 0.8474, + "step": 425500 + }, + { + "epoch": 5.93, + "learning_rate": 3.6480028977834744e-07, + "loss": 0.8179, + "step": 425600 + }, + { + "epoch": 5.93, + "learning_rate": 3.57834464119032e-07, + "loss": 0.8267, + "step": 425700 + }, + { + "epoch": 5.93, + "learning_rate": 3.5086863845971665e-07, + "loss": 0.8247, + "step": 425800 + }, + { + "epoch": 5.93, + "learning_rate": 3.4390281280040123e-07, + "loss": 0.8374, + "step": 425900 + }, + { + "epoch": 5.93, + "learning_rate": 3.3693698714108586e-07, + "loss": 0.7893, + "step": 426000 + }, + { + "epoch": 5.94, + "learning_rate": 3.2997116148177044e-07, + "loss": 0.8325, + "step": 426100 + }, + { + "epoch": 5.94, + "learning_rate": 3.2300533582245507e-07, + "loss": 0.8137, + "step": 426200 + }, + { + "epoch": 5.94, + "learning_rate": 3.1603951016313964e-07, + "loss": 0.8398, + "step": 426300 + }, + { + "epoch": 5.94, + "learning_rate": 3.090736845038242e-07, + "loss": 0.8326, + "step": 426400 + }, + { + "epoch": 5.94, + "learning_rate": 3.0210785884450885e-07, + "loss": 0.8341, + "step": 426500 + }, + { + "epoch": 5.94, + "learning_rate": 2.9514203318519343e-07, + "loss": 0.8363, + "step": 426600 + }, + { + "epoch": 5.94, + "learning_rate": 2.8817620752587806e-07, + "loss": 0.825, + "step": 426700 + }, + { + "epoch": 5.95, + "learning_rate": 2.812103818665627e-07, + "loss": 0.8365, + "step": 426800 + }, + { + "epoch": 5.95, + "learning_rate": 2.7424455620724727e-07, + "loss": 0.8254, + "step": 426900 + }, + { + "epoch": 5.95, + "learning_rate": 2.6727873054793184e-07, + "loss": 0.8007, + "step": 427000 + }, + { + "epoch": 5.95, + "learning_rate": 2.603129048886165e-07, + "loss": 0.8189, + "step": 427100 + }, + { + "epoch": 5.95, + "learning_rate": 2.5334707922930105e-07, + "loss": 0.8243, + "step": 427200 + }, + { + "epoch": 5.95, + "learning_rate": 2.4638125356998563e-07, + "loss": 0.8188, + "step": 427300 + }, + { + "epoch": 5.95, + "learning_rate": 2.3941542791067026e-07, + "loss": 0.8351, + "step": 427400 + }, + { + "epoch": 5.96, + "learning_rate": 2.32519260507948e-07, + "loss": 0.8359, + "step": 427500 + }, + { + "epoch": 5.96, + "learning_rate": 2.2555343484863262e-07, + "loss": 0.8314, + "step": 427600 + }, + { + "epoch": 5.96, + "learning_rate": 2.1858760918931722e-07, + "loss": 0.802, + "step": 427700 + }, + { + "epoch": 5.96, + "learning_rate": 2.1162178353000182e-07, + "loss": 0.8333, + "step": 427800 + }, + { + "epoch": 5.96, + "learning_rate": 2.0465595787068643e-07, + "loss": 0.8329, + "step": 427900 + }, + { + "epoch": 5.96, + "learning_rate": 1.97690132211371e-07, + "loss": 0.805, + "step": 428000 + }, + { + "epoch": 5.96, + "learning_rate": 1.907243065520556e-07, + "loss": 0.8559, + "step": 428100 + }, + { + "epoch": 5.97, + "learning_rate": 1.8375848089274024e-07, + "loss": 0.836, + "step": 428200 + }, + { + "epoch": 5.97, + "learning_rate": 1.7679265523342482e-07, + "loss": 0.8504, + "step": 428300 + }, + { + "epoch": 5.97, + "learning_rate": 1.6982682957410942e-07, + "loss": 0.8513, + "step": 428400 + }, + { + "epoch": 5.97, + "learning_rate": 1.6286100391479402e-07, + "loss": 0.8228, + "step": 428500 + }, + { + "epoch": 5.97, + "learning_rate": 1.5589517825547863e-07, + "loss": 0.8337, + "step": 428600 + }, + { + "epoch": 5.97, + "learning_rate": 1.4892935259616323e-07, + "loss": 0.8333, + "step": 428700 + }, + { + "epoch": 5.97, + "learning_rate": 1.419635269368478e-07, + "loss": 0.816, + "step": 428800 + }, + { + "epoch": 5.98, + "learning_rate": 1.3499770127753244e-07, + "loss": 0.8424, + "step": 428900 + }, + { + "epoch": 5.98, + "learning_rate": 1.2803187561821702e-07, + "loss": 0.8313, + "step": 429000 + }, + { + "epoch": 5.98, + "learning_rate": 1.2106604995890162e-07, + "loss": 0.8185, + "step": 429100 + }, + { + "epoch": 5.98, + "learning_rate": 1.1410022429958622e-07, + "loss": 0.811, + "step": 429200 + }, + { + "epoch": 5.98, + "learning_rate": 1.0713439864027084e-07, + "loss": 0.8382, + "step": 429300 + }, + { + "epoch": 5.98, + "learning_rate": 1.0016857298095543e-07, + "loss": 0.8313, + "step": 429400 + }, + { + "epoch": 5.98, + "learning_rate": 9.320274732164004e-08, + "loss": 0.8064, + "step": 429500 + }, + { + "epoch": 5.99, + "learning_rate": 8.623692166232464e-08, + "loss": 0.8072, + "step": 429600 + }, + { + "epoch": 5.99, + "learning_rate": 7.927109600300923e-08, + "loss": 0.8101, + "step": 429700 + }, + { + "epoch": 5.99, + "learning_rate": 7.230527034369385e-08, + "loss": 0.8358, + "step": 429800 + }, + { + "epoch": 5.99, + "learning_rate": 6.533944468437844e-08, + "loss": 0.8318, + "step": 429900 + }, + { + "epoch": 5.99, + "learning_rate": 5.837361902506304e-08, + "loss": 0.8202, + "step": 430000 + }, + { + "epoch": 5.99, + "learning_rate": 5.1407793365747645e-08, + "loss": 0.8055, + "step": 430100 + }, + { + "epoch": 5.99, + "learning_rate": 4.444196770643224e-08, + "loss": 0.8096, + "step": 430200 + }, + { + "epoch": 5.99, + "learning_rate": 3.7476142047116846e-08, + "loss": 0.8211, + "step": 430300 + }, + { + "epoch": 6.0, + "learning_rate": 3.051031638780144e-08, + "loss": 0.8008, + "step": 430400 + }, + { + "epoch": 6.0, + "learning_rate": 2.3544490728486047e-08, + "loss": 0.8318, + "step": 430500 + }, + { + "epoch": 6.0, + "learning_rate": 1.657866506917065e-08, + "loss": 0.8358, + "step": 430600 + }, + { + "epoch": 6.0, + "eval_gen_len": 20.0, + "eval_loss": 1.199650764465332, + "eval_rouge1": 12.5153, + "eval_rouge2": 3.778, + "eval_rougeL": 12.0382, + "eval_rougeLsum": 12.1332, + "eval_runtime": 1506.4096, + "eval_samples_per_second": 8.874, + "eval_steps_per_second": 2.219, + "step": 430674 + }, + { + "epoch": 6.0, + "step": 430674, + "total_flos": 4.0669778872979374e+18, + "train_loss": 1.0701684290170863, + "train_runtime": 175298.0292, + "train_samples_per_second": 9.827, + "train_steps_per_second": 2.457 + } + ], + "max_steps": 430674, + "num_train_epochs": 6, + "total_flos": 4.0669778872979374e+18, + "trial_name": null, + "trial_params": null +}